diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -43,6 +43,18 @@ }; typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; +/// Information about shadow pointers. +struct ShadowPtrInfoTy { + void **HstPtrAddr = nullptr; + void *HstPtrVal = nullptr; + void **TgtPtrAddr = nullptr; + void *TgtPtrVal = nullptr; +}; + +inline bool operator<(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) { + return lhs.HstPtrAddr < rhs.HstPtrAddr; +} + /// Map between host data and target data. struct HostDataToTargetTy { const uintptr_t HstPtrBase; // host info. @@ -79,9 +91,9 @@ uint64_t DynRefCount; uint64_t HoldRefCount; - /// Boolean flag to remember if any subpart of the mapped region might be - /// an attached pointer. - bool MayContainAttachedPointers; + /// A map of shadow pointers associated with this entry, the keys are host + /// pointer addresses to identify stale entries. + std::set ShadowPtrInfos; /// This mutex will be locked when data movement is issued. For targets that /// doesn't support async data movement, this mutex can guarantee that after @@ -212,11 +224,28 @@ return ThisRefCount == 1; } - void setMayContainAttachedPointers() const { - States->MayContainAttachedPointers = true; + /// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the + /// the target ptr value was not already present in the existing set of shadow + /// pointers. Return true if something was added. + bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const { + auto Pair = States->ShadowPtrInfos.emplace(ShadowPtrInfo); + if (Pair.second) + return true; + // Check for a stale entry, if found, remove the old one and recurse. + if (Pair.first->TgtPtrVal == ShadowPtrInfo.TgtPtrVal) + return false; + States->ShadowPtrInfos.erase(Pair.first); + return addShadowPointer(ShadowPtrInfo); } - bool getMayContainAttachedPointers() const { - return States->MayContainAttachedPointers; + + /// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if + /// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns + /// OFFLOAD_SUCCESS. The entry is locked for this operation. + template int foreachShadowPointerInfo(CBTy CB) const { + for (auto &It : States->ShadowPtrInfos) + if (CB(It) == OFFLOAD_FAIL) + return OFFLOAD_FAIL; + return OFFLOAD_SUCCESS; } /// Increment the delete counter indicating that this thread plans to delete @@ -241,6 +270,7 @@ uintptr_t KeyValue; HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {} + HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {} HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT) : KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {} HostDataToTargetTy *HDTT; @@ -272,7 +302,10 @@ }; /// This struct will be returned by \p DeviceTy::getTargetPointer which provides -/// more data than just a target pointer. +/// more data than just a target pointer. A TargetPointerResultTy that has a non +/// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists +/// the entry is locked. To give up ownership without destroying the TPR use the +/// reset() function. class TargetPointerResultTy { struct FlagTy { /// Flag indicating that the map table entry was just created. @@ -292,11 +325,36 @@ void *TargetPointer = nullptr; public: + TargetPointerResultTy(const TargetPointerResultTy &) = delete; + TargetPointerResultTy &operator=(const TargetPointerResultTy &TPR) = delete; + TargetPointerResultTy() {} TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry, void *TargetPointer) - : Flags(Flags), Entry(Entry), TargetPointer(TargetPointer) {} + : Flags(Flags), Entry(Entry), TargetPointer(TargetPointer) { + if (Entry) + Entry->lock(); + } + + TargetPointerResultTy(TargetPointerResultTy &&TPR) + : Flags(TPR.Flags), Entry(TPR.Entry), TargetPointer(TPR.TargetPointer) { + TPR.Entry = nullptr; + } + + TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) { + if (&TPR != this) { + std::swap(Flags, TPR.Flags); + std::swap(Entry, TPR.Entry); + std::swap(TargetPointer, TPR.TargetPointer); + } + return *this; + } + + ~TargetPointerResultTy() { + if (Entry) + Entry->unlock(); + } bool isHostPtr() const { return Flags.IsHostPointer; } void setIsHostPtr(bool IHP) { Flags.IsHostPointer = IHP; } @@ -311,16 +369,16 @@ void setTargetPointer(void *TP) { TargetPointer = TP; } HostDataToTargetTy *getEntry() const { return Entry; } - void setEntry(HostDataToTargetTy *HDTTT) { Entry = HDTTT; } -}; + void setEntry(HostDataToTargetTy *HDTTT, bool Lock = true) { + if (Entry) + Entry->unlock(); + Entry = HDTTT; + if (Entry && Lock) + Entry->lock(); + } -/// Map for shadow pointers -struct ShadowPtrValTy { - void *HstPtrVal; - void *TgtPtrAddr; - void *TgtPtrVal; + void reset() { *this = TargetPointerResultTy(); } }; -typedef std::map ShadowPtrListTy; /// struct PendingCtorDtorListsTy { @@ -354,9 +412,7 @@ PendingCtorsDtorsPerLibrary PendingCtorsDtors; - ShadowPtrListTy ShadowPtrMap; - - std::mutex PendingGlobalsMtx, ShadowMtx; + std::mutex PendingGlobalsMtx; // NOTE: Once libomp gains full target-task support, this state should be // moved into the target task in libomp. @@ -387,13 +443,13 @@ /// - Data allocation failed; /// - The user tried to do an illegal mapping; /// - Data transfer issue fails. - TargetPointerResultTy - getTargetPointer(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, - void *HstPtrBase, int64_t Size, map_var_info_t HstPtrName, - bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, - bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, - AsyncInfoTy &AsyncInfo); + TargetPointerResultTy getTargetPointer( + HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, + int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, + bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, + AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr, + bool ReleaseHDTTMap = true); /// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor /// ensures exclusive access to the HDTT map. @@ -441,10 +497,12 @@ // synchronous. // Copy data from host to device int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo); + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry = nullptr); // Copy data from device back to host int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo); + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry = nullptr); // Copy data from current device to destination device directly int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, AsyncInfoTy &AsyncInfo); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -51,8 +52,7 @@ DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), - HasPendingGlobals(false), PendingCtorsDtors(), ShadowPtrMap(), - PendingGlobalsMtx(), ShadowMtx() {} + HasPendingGlobals(false), PendingCtorsDtors(), PendingGlobalsMtx() {} DeviceTy::~DeviceTy() { if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) @@ -109,32 +109,35 @@ HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); auto It = HDTTMap->find(HstPtrBegin); - if (It != HDTTMap->end()) { - HostDataToTargetTy &HDTT = *It->HDTT; - // Mapping exists - if (HDTT.getHoldRefCount()) { - // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: - // "It is an error to call acc_unmap_data if the structured reference - // count for the pointer is not zero." - REPORT("Trying to disassociate a pointer with a non-zero hold reference " - "count\n"); - } else if (HDTT.isDynRefCountInf()) { - DP("Association found, removing it\n"); - void *Event = HDTT.getEvent(); - delete &HDTT; - if (Event) - destroyEvent(Event); - HDTTMap->erase(It); - return OFFLOAD_SUCCESS; - } else { - REPORT("Trying to disassociate a pointer which was not mapped via " - "omp_target_associate_ptr\n"); - } - } else { + if (It == HDTTMap->end()) { REPORT("Association not found\n"); + return OFFLOAD_FAIL; + } + // Mapping exists + HostDataToTargetTy &HDTT = *It->HDTT; + std::lock_guard LG(HDTT); + + if (HDTT.getHoldRefCount()) { + // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: + // "It is an error to call acc_unmap_data if the structured reference + // count for the pointer is not zero." + REPORT("Trying to disassociate a pointer with a non-zero hold reference " + "count\n"); + return OFFLOAD_FAIL; } - // Mapping not found + if (HDTT.isDynRefCountInf()) { + DP("Association found, removing it\n"); + void *Event = HDTT.getEvent(); + delete &HDTT; + if (Event) + destroyEvent(Event); + HDTTMap->erase(It); + return OFFLOAD_SUCCESS; + } + + REPORT("Trying to disassociate a pointer which was not mapped via " + "omp_target_associate_ptr\n"); return OFFLOAD_FAIL; } @@ -212,12 +215,17 @@ HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) { + bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { TargetPointerResultTy TPR; LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - TPR.setEntry(LR.Entry); + // Release the mapping table lock only after the entry is locked by + // attaching it to TPR. Once TPR is destroyed it will release the lock + // on entry. If it is returned the lock will move to the returned object. + // If LR.Entry is already owned/locked we avoid trying to lock it again. + TPR.setEntry(LR.Entry, /* Lock */ LR.Entry != OwnedTPR); // Not that the entry can be null. HostDataToTargetTy *Entry = TPR.getEntry(); @@ -296,6 +304,8 @@ (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, HstPtrName)) .first->HDTT; + // Release the mapping table lock only after the entry is locked by + // attaching it to TPR. TPR.setEntry(Entry); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, @@ -308,6 +318,11 @@ TPR.setTargetPointer((void *)Ptr); } + // All mapping table modifications have been made. If the user requested it we + // give up the lock. + if (ReleaseHDTTMap) + HDTTMap.destroy(); + // If the target pointer is valid, and we need to transfer data, issue the // data transfer. if (TPR.getTargetPointer() && !TPR.isHostPtr() && HasFlagTo && @@ -434,31 +449,33 @@ int DeviceTy::deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR, int64_t Size) { + HostDataToTargetTy &HT = *LR.Entry; + // Check if the pointer is contained in any sub-nodes. if (!(LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter)) { REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the" " allocated memory\n", - DPxPTR(LR.Entry->HstPtrBegin)); + DPxPTR(HT.HstPtrBegin)); return OFFLOAD_FAIL; } - auto &HT = *LR.Entry; - // Verify this thread is still in charge of deleting the entry. + DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", + DPxPTR(HT.TgtPtrBegin), Size); + assert(HT.getTotalRefCount() == 0 && "Trying to delete entry that is in use or owned by another thread."); - DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", - DPxPTR(HT.TgtPtrBegin), Size); deleteData((void *)HT.TgtPtrBegin); + INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%" PRId64 ", Name=%s\n", DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size, (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown"); - void *Event = LR.Entry->getEvent(); + void *Event = HT.getEvent(); HDTTMap->erase(LR.Entry); - delete LR.Entry; + delete &HT; int Ret = OFFLOAD_SUCCESS; if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { @@ -517,49 +534,56 @@ return RTL->data_delete(RTLDeviceID, TgtPtrBegin); } +static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin, + void *DstPtrBegin, int64_t Size, + HostDataToTargetTy *HT) { + + INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceId, + "Copying data from %s to %s, %sPtr=" DPxMOD ", %sPtr=" DPxMOD + ", Size=%" PRId64 ", Name=%s\n", + H2D ? "host" : "device", H2D ? "device" : "host", H2D ? "Hst" : "Tgt", + DPxPTR(SrcPtrBegin), H2D ? "Tgt" : "Hst", DPxPTR(DstPtrBegin), Size, + (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() + : "unknown"); +} + // Submit data to device int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo) { + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); - LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - auto *HT = &*LR.Entry; - - INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, - "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD - ", Size=%" PRId64 ", Name=%s\n", - DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size, - (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() - : "unknown"); + if (!Entry) { + HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); + LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + Entry = LR.Entry; + } + printCopyInfo(DeviceID, /* H2D */ true, TgtPtrBegin, HstPtrBegin, Size, + Entry); } if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); - else - return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, - AsyncInfo); + return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, + AsyncInfo); } // Retrieve data from device int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, - int64_t Size, AsyncInfoTy &AsyncInfo) { + int64_t Size, AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); - LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - auto *HT = &*LR.Entry; - INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, - "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD - ", Size=%" PRId64 ", Name=%s\n", - DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size, - (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() - : "unknown"); + if (!Entry) { + HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); + LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + Entry = LR.Entry; + } + printCopyInfo(DeviceID, /* H2D */ false, TgtPtrBegin, HstPtrBegin, Size, + Entry); } - if (!RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); - else - return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, - AsyncInfo); + return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, + AsyncInfo); } // Copy data from current device to destination device directly @@ -569,9 +593,9 @@ assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size); - } else - return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, - DstPtr, Size, AsyncInfo); + } + return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, + DstPtr, Size, AsyncInfo); } // Run region on device diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -503,7 +503,8 @@ HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr, /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef, - HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo); + HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo, + /* OwnedTPR */ nullptr, /* ReleaseHDTTMap */ false); PointerTgtPtrBegin = (void **)Pointer_TPR.getTargetPointer(); if (!PointerTgtPtrBegin) { REPORT("Call to getTargetPointer returned null pointer (%s).\n", @@ -526,10 +527,11 @@ const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO; const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; + // Note that HDTTMap will be released in getTargetPointer. auto TPR = Device.getTargetPointer( HDTTMap, HstPtrBegin, HstPtrBase, data_size, HstPtrName, HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, - HasPresentModifier, HasHoldModifier, AsyncInfo); + HasPresentModifier, HasHoldModifier, AsyncInfo, Pointer_TPR.getEntry()); void *TgtPtrBegin = TPR.getTargetPointer(); // If data_size==0, then the argument could be a zero-length pointer to @@ -552,39 +554,12 @@ } if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !TPR.isHostPtr()) { - // Check whether we need to update the pointer on the device - bool UpdateDevPtr = false; - uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); - Device.ShadowMtx.lock(); - auto Entry = Device.ShadowPtrMap.find(PointerHstPtrBegin); - // If this pointer is not in the map we need to insert it. If the map - // contains a stale entry, we need to update it (e.g. if the pointee was - // deallocated and later on is reallocated at another device address). The - // latter scenario is the subject of LIT test env/base_ptr_ref_count.c. An - // entry is removed from ShadowPtrMap only when the PTR of a PTR_AND_OBJ - // pair is deallocated, not when the OBJ is deallocated. In - // env/base_ptr_ref_count.c the PTR is a global "declare target" pointer, - // so it stays in the map for the lifetime of the application. When the - // OBJ is deallocated and later on allocated again (at a different device - // address), ShadowPtrMap still contains an entry for PointerHstPtrBegin - // which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ. - if (Entry == Device.ShadowPtrMap.end() || - Entry->second.TgtPtrVal != ExpectedTgtPtrBase) { - // create or update shadow pointers for this entry - Device.ShadowPtrMap[PointerHstPtrBegin] = { - HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase}; - Pointer_TPR.getEntry()->setMayContainAttachedPointers(); - UpdateDevPtr = true; - } - - if (UpdateDevPtr) { - std::lock_guard LG( - *Pointer_TPR.getEntry()); - Device.ShadowMtx.unlock(); - + if (Pointer_TPR.getEntry()->addShadowPointer( + ShadowPtrInfoTy{PointerHstPtrBegin, HstPtrBase, + PointerTgtPtrBegin, ExpectedTgtPtrBase})) { DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); @@ -592,7 +567,7 @@ TgtPtrBase = ExpectedTgtPtrBase; int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, - sizeof(void *), AsyncInfo); + sizeof(void *), AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; @@ -600,8 +575,7 @@ if (Pointer_TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } else - Device.ShadowMtx.unlock(); + } } } @@ -635,45 +609,6 @@ DelEntry(DelEntry), Entry(Entry) {} }; -/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to -/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the -/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the -/// rest of the map is not checked anymore. -template -static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin, - uintptr_t Size, HostDataToTargetTy *Entry) { - // If we have an object that is too small to hold a pointer subobject, no need - // to do any checking. - if (Size < sizeof(void *)) - return; - - // If the map entry for the object was never marked as containing attached - // pointers, no need to do any checking. - if (!Entry || !Entry->getMayContainAttachedPointers()) - return; - - uintptr_t LB = (uintptr_t)Begin; - uintptr_t UB = LB + Size; - // Now we are looking into the shadow map so we need to lock it. - std::lock_guard LG(Device.ShadowMtx); - for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin(); - Itr != Device.ShadowPtrMap.end();) { - uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first; - - // An STL map is sorted on its keys; use this property - // to quickly determine when to break out of the loop. - if (ShadowHstPtrAddr < LB) { - ++Itr; - continue; - } - if (ShadowHstPtrAddr >= UB) - break; - - if (CB(Itr) == OFFLOAD_FAIL) - break; - } -} - } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -809,7 +744,7 @@ } Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, - AsyncInfo); + AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; @@ -856,11 +791,13 @@ // If we marked the entry to be deleted we need to verify no other thread // reused it by now. If deletion is still supposed to happen by this thread // LR will be set and exclusive access to the HDTT map will avoid another - // thread reusing the entry now. Note that we do not request (exclusive) - // access to the HDTT map if Info.DelEntry is not set. - LookupResult LR; + // thread reusing the entry now. DeviceTy::HDTTMapAccessorTy HDTTMap = - Device.HostDataToTargetMap.getExclusiveAccessor(!Info.DelEntry); + Device.HostDataToTargetMap.getExclusiveAccessor(); + LookupResult LR; + + // We cannot use a lock guard because we may end up delete the mutex. + Info.Entry->lock(); if (Info.DelEntry) { LR = Device.lookupMapping(HDTTMap, Info.HstPtrBegin, Info.DataSize); @@ -878,31 +815,23 @@ // need to restore the original host pointer values from their shadow // copies. If the struct is going to be deallocated, remove any remaining // shadow pointer entries for this struct. - auto CB = [&](ShadowPtrListTy::iterator &Itr) { + if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) { // If we copied the struct to the host, we need to restore the pointer. - if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) { - void **ShadowHstPtrAddr = (void **)Itr->first; - *ShadowHstPtrAddr = Itr->second.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD " for host " - "pointer " DPxMOD "\n", - DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - } - // If the struct is to be deallocated, remove the shadow entry. - if (Info.DelEntry) { - DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)); - Itr = Device.ShadowPtrMap.erase(Itr); - } else { - ++Itr; - } - return OFFLOAD_SUCCESS; - }; - applyToShadowMapEntries(Device, CB, Info.HstPtrBegin, Info.DataSize, - Info.Entry); + Info.Entry->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD " for host " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); + } // If we are deleting the entry the DataMapMtx is locked and we own the // entry. if (!Info.DelEntry || (FromMapperBase && FromMapperBase == Info.HstPtrBegin)) { + Info.Entry->unlock(); continue; } @@ -948,49 +877,63 @@ if (ArgType & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo); + int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo, + TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; } - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - void **ShadowHstPtrAddr = (void **)Itr->first; - // Wait for device-to-host memcopies for whole struct to complete, - // before restoring the correct host pointer. - if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS) - return OFFLOAD_FAIL; - *ShadowHstPtrAddr = Itr->second.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD - " for host pointer " DPxMOD "\n", - DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - ++Itr; - return OFFLOAD_SUCCESS; - }; - applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR.getEntry()); + if (TPR.getEntry()) { + int Ret = TPR.getEntry()->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + // Wait for device-to-host memcopies for whole struct to complete, + // before restoring the correct host pointer. + if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD + " for host pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); + if (Ret != OFFLOAD_SUCCESS) { + DP("Updating shadow map failed\n"); + return Ret; + } + } } if (ArgType & OMP_TGT_MAPTYPE_TO) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo); + int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo, + TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - DP("Restoring original target pointer value " DPxMOD " for target " - "pointer " DPxMOD "\n", - DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr)); - Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal, - sizeof(void *), AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) - REPORT("Copying data to device failed.\n"); - ++Itr; - return Ret; - }; - applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR.getEntry()); + if (TPR.getEntry()) { + int Ret = TPR.getEntry()->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + DP("Restoring original target pointer value " DPxMOD " for target " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr)); + Ret = Device.submitData(ShadowPtr.TgtPtrAddr, + (void *)&ShadowPtr.TgtPtrVal, + sizeof(void *), AsyncInfo, TPR.getEntry()); + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; + }); + if (Ret != OFFLOAD_SUCCESS) { + DP("Updating shadow map failed\n"); + return Ret; + } + } } return OFFLOAD_SUCCESS; } @@ -1390,7 +1333,7 @@ DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, - sizeof(void *), AsyncInfo); + sizeof(void *), AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL;