diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -43,6 +43,14 @@ }; typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; +/// Information about shadow pointers. +struct ShadowPtrInfoTy { + void **HstPtrAddr = nullptr; + void *HstPtrVal = nullptr; + void **TgtPtrAddr = nullptr; + void *TgtPtrVal = nullptr; +}; + /// Map between host data and target data. struct HostDataToTargetTy { const uintptr_t HstPtrBase; // host info. @@ -79,9 +87,9 @@ uint64_t DynRefCount; uint64_t HoldRefCount; - /// Boolean flag to remember if any subpart of the mapped region might be - /// an attached pointer. - bool MayContainAttachedPointers; + /// A map of shadow pointers associated with this entry, the keys are host + /// pointer addresses to identify stale entries. + std::map ShadowPtrInfos; /// This mutex will be locked when data movement is issued. For targets that /// doesn't support async data movement, this mutex can guarantee that after @@ -212,11 +220,28 @@ return ThisRefCount == 1; } - void setMayContainAttachedPointers() const { - States->MayContainAttachedPointers = true; + /// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the + /// the target ptr value was not already present in the existing set of shadow + /// pointers. Return true if something was added. + bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const { + auto Pair = + States->ShadowPtrInfos.emplace(ShadowPtrInfo.HstPtrAddr, ShadowPtrInfo); + if (Pair.second) + return true; + if (Pair.first->second.TgtPtrVal == ShadowPtrInfo.TgtPtrVal) + return false; + Pair.first->second = ShadowPtrInfo; + return true; } - bool getMayContainAttachedPointers() const { - return States->MayContainAttachedPointers; + + /// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if + /// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns + /// OFFLOAD_SUCCESS. The entry is locked for this operation. + template int foreachShadowPointerInfo(CBTy CB) const { + for (auto &It : States->ShadowPtrInfos) + if (CB(It.second) == OFFLOAD_FAIL) + return OFFLOAD_FAIL; + return OFFLOAD_SUCCESS; } /// Increment the delete counter indicating that this thread plans to delete @@ -241,6 +266,7 @@ uintptr_t KeyValue; HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {} + HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {} HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT) : KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {} HostDataToTargetTy *HDTT; @@ -290,7 +316,6 @@ /// The corresponding target pointer void *TargetPointer = nullptr; -}; public: TargetPointerResultTy() {} @@ -298,6 +323,27 @@ TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry, void *TargetPointer) : Flags(Flags), Entry(Entry), TargetPointer(TargetPointer) { + if (Entry) + Entry->lock(); + } + + TargetPointerResultTy(TargetPointerResultTy &&TPR) + : Flags(TPR.Flags), Entry(TPR.Entry), TargetPointer(TPR.TargetPointer) { + TPR.Entry = nullptr; + } + + TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) { + if (&TPR != this) { + std::swap(Flags, TPR.Flags); + std::swap(Entry, TPR.Entry); + std::swap(TargetPointer, TPR.TargetPointer); + } + return *this; + } + + ~TargetPointerResultTy() { + if (Entry) + Entry->unlock(); } bool isHostPtr() const { return Flags.IsHostPointer; } @@ -313,7 +359,15 @@ void setTargetPointer(void *TP) { TargetPointer = TP; } HostDataToTargetTy *getEntry() const { return Entry; } - void setEntry(HostDataToTargetTy *HDTTT) { Entry = HDTTT; } + void setEntry(HostDataToTargetTy *HDTTT, bool Lock = true) { + if (Entry) + Entry->unlock(); + Entry = HDTTT; + if (Entry && Lock) + Entry->lock(); + } + + void reset() { *this = TargetPointerResultTy(); } }; /// @@ -348,9 +402,7 @@ PendingCtorsDtorsPerLibrary PendingCtorsDtors; - ShadowPtrListTy ShadowPtrMap; - - std::mutex PendingGlobalsMtx, ShadowMtx; + std::mutex PendingGlobalsMtx; // NOTE: Once libomp gains full target-task support, this state should be // moved into the target task in libomp. @@ -381,13 +433,13 @@ /// - Data allocation failed; /// - The user tried to do an illegal mapping; /// - Data transfer issue fails. - TargetPointerResultTy - getTargetPointer(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, - void *HstPtrBase, int64_t Size, map_var_info_t HstPtrName, - bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, - bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, - AsyncInfoTy &AsyncInfo); + TargetPointerResultTy getTargetPointer( + HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, + int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, + bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, + AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr, + bool ReleaseHDTTMap = true); /// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor /// ensures exclusive access to the HDTT map. @@ -435,10 +487,12 @@ // synchronous. // Copy data from host to device int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo); + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry = nullptr); // Copy data from device back to host int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo); + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry = nullptr); // Copy data from current device to destination device directly int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, AsyncInfoTy &AsyncInfo); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -51,8 +52,7 @@ DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), - HasPendingGlobals(false), PendingCtorsDtors(), ShadowPtrMap(), - PendingGlobalsMtx(), ShadowMtx() {} + HasPendingGlobals(false), PendingCtorsDtors(), PendingGlobalsMtx() {} DeviceTy::~DeviceTy() { if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) @@ -109,32 +109,35 @@ HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); auto It = HDTTMap->find(HstPtrBegin); - if (It != HDTTMap->end()) { - HostDataToTargetTy &HDTT = *It->HDTT; - // Mapping exists - if (HDTT.getHoldRefCount()) { - // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: - // "It is an error to call acc_unmap_data if the structured reference - // count for the pointer is not zero." - REPORT("Trying to disassociate a pointer with a non-zero hold reference " - "count\n"); - } else if (HDTT.isDynRefCountInf()) { - DP("Association found, removing it\n"); - void *Event = HDTT.getEvent(); - delete &HDTT; - if (Event) - destroyEvent(Event); - HDTTMap->erase(It); - return OFFLOAD_SUCCESS; - } else { - REPORT("Trying to disassociate a pointer which was not mapped via " - "omp_target_associate_ptr\n"); - } - } else { + if (It == HDTTMap->end()) { REPORT("Association not found\n"); + return OFFLOAD_FAIL; + } + // Mapping exists + HostDataToTargetTy &HDTT = *It->HDTT; + std::lock_guard LG(HDTT); + + if (HDTT.getHoldRefCount()) { + // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: + // "It is an error to call acc_unmap_data if the structured reference + // count for the pointer is not zero." + REPORT("Trying to disassociate a pointer with a non-zero hold reference " + "count\n"); + return OFFLOAD_FAIL; } - // Mapping not found + if (HDTT.isDynRefCountInf()) { + DP("Association found, removing it\n"); + void *Event = HDTT.getEvent(); + delete &HDTT; + if (Event) + destroyEvent(Event); + HDTTMap->erase(It); + return OFFLOAD_SUCCESS; + } + + REPORT("Trying to disassociate a pointer which was not mapped via " + "omp_target_associate_ptr\n"); return OFFLOAD_FAIL; } @@ -190,12 +193,17 @@ HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) { + bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { TargetPointerResultTy TPR; LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - TPR.setEntry(LR.Entry); + // Release the mapping table lock only after the entry is locked by + // attaching it to TPR. Once TPR is destroyed it will release the lock + // on entry. If it is returned the lock will move to the returned object. + // If LR.Entry is already owned/locked we avoid trying to lock it again. + TPR.setEntry(LR.Entry, /* Lock */ LR.Entry != OwnedTPR); // Not that the entry can be null. HostDataToTargetTy *Entry = TPR.getEntry(); @@ -274,6 +282,8 @@ (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, HstPtrName)) .first->HDTT; + // Release the mapping table lock only after the entry is locked by + // attaching it to TPR. TPR.setEntry(Entry); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, @@ -286,6 +296,11 @@ TPR.setTargetPointer((void *)Ptr); } + // All mapping table modifications have been made. If the user requested it we + // give up the lock. + if (ReleaseHDTTMap) + HDTTMap.destroy(); + // If the target pointer is valid, and we need to transfer data, issue the // data transfer. if (TPR.getTargetPointer() && !TPR.isHostPtr() && HasFlagTo && @@ -413,32 +428,33 @@ int DeviceTy::deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR, int64_t Size) { + HostDataToTargetTy &HT = *LR.Entry; + // Check if the pointer is contained in any sub-nodes. if (!(LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter)) { REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the" " allocated memory\n", - DPxPTR(LR.Entry->HstPtrBegin)); + DPxPTR(HT.HstPtrBegin)); return OFFLOAD_FAIL; } - auto &HT = *LR.Entry; - // Verify this thread is still in charge of deleting the entry. + DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", + DPxPTR(HT.TgtPtrBegin), Size); + assert(HT.getTotalRefCount() == 0 && - HT.getDeleteThreadId() == std::this_thread::get_id() && "Trying to delete entry that is in use or owned by another thread."); - DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", - DPxPTR(HT.TgtPtrBegin), Size); deleteData((void *)HT.TgtPtrBegin); + INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%" PRId64 ", Name=%s\n", DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size, (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown"); - void *Event = LR.Entry->getEvent(); + void *Event = HT.getEvent(); HDTTMap->erase(LR.Entry); - delete LR.Entry; + delete &HT; int Ret = OFFLOAD_SUCCESS; if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { @@ -497,49 +513,56 @@ return RTL->data_delete(RTLDeviceID, TgtPtrBegin); } +static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin, + void *DstPtrBegin, int64_t Size, + HostDataToTargetTy *HT) { + + INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceId, + "Copying data from %s to %s, %sPtr=" DPxMOD ", %sPtr=" DPxMOD + ", Size=%" PRId64 ", Name=%s\n", + H2D ? "host" : "device", H2D ? "device" : "host", H2D ? "Hst" : "Tgt", + DPxPTR(SrcPtrBegin), H2D ? "Tgt" : "Hst", DPxPTR(DstPtrBegin), Size, + (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() + : "unknown"); +} + // Submit data to device int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo) { + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); - LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - auto *HT = &*LR.Entry; - - INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, - "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD - ", Size=%" PRId64 ", Name=%s\n", - DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size, - (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() - : "unknown"); + if (!Entry) { + HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); + LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + Entry = LR.Entry; + } + printCopyInfo(DeviceID, /* H2D */ true, TgtPtrBegin, HstPtrBegin, Size, + Entry); } if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); - else - return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, - AsyncInfo); + return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, + AsyncInfo); } // Retrieve data from device int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, - int64_t Size, AsyncInfoTy &AsyncInfo) { + int64_t Size, AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); - LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - auto *HT = &*LR.Entry; - INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, - "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD - ", Size=%" PRId64 ", Name=%s\n", - DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size, - (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() - : "unknown"); + if (!Entry) { + HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); + LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + Entry = LR.Entry; + } + printCopyInfo(DeviceID, /* H2D */ false, TgtPtrBegin, HstPtrBegin, Size, + Entry); } - if (!RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); - else - return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, - AsyncInfo); + return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, + AsyncInfo); } // Copy data from current device to destination device directly @@ -549,9 +572,9 @@ assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size); - } else - return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, - DstPtr, Size, AsyncInfo); + } + return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, + DstPtr, Size, AsyncInfo); } // Run region on device diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -503,7 +503,8 @@ HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr, /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef, - HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo); + HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo, + /* OwnedTPR */ nullptr, /* ReleaseHDTTMap */ false); PointerTgtPtrBegin = (void **)Pointer_TPR.getTargetPointer(); if (!PointerTgtPtrBegin) { REPORT("Call to getTargetPointer returned null pointer (%s).\n", @@ -526,10 +527,11 @@ const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO; const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; + // Note that HDTTMap will be released in getTargetPointer. auto TPR = Device.getTargetPointer( HDTTMap, HstPtrBegin, HstPtrBase, data_size, HstPtrName, HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, - HasPresentModifier, HasHoldModifier, AsyncInfo); + HasPresentModifier, HasHoldModifier, AsyncInfo, Pointer_TPR.getEntry()); void *TgtPtrBegin = TPR.getTargetPointer(); // If data_size==0, then the argument could be a zero-length pointer to @@ -555,32 +557,9 @@ uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); - Device.ShadowMtx.lock(); - auto Entry = Device.ShadowPtrMap.find(Pointer_HstPtrBegin); - // If this pointer is not in the map we need to insert it. If the map - // contains a stale entry, we need to update it (e.g. if the pointee was - // deallocated and later on is reallocated at another device address). The - // latter scenario is the subject of LIT test env/base_ptr_ref_count.c. An - // entry is removed from ShadowPtrMap only when the PTR of a PTR_AND_OBJ - // pair is deallocated, not when the OBJ is deallocated. In - // env/base_ptr_ref_count.c the PTR is a global "declare target" pointer, - // so it stays in the map for the lifetime of the application. When the - // OBJ is deallocated and later on allocated again (at a different device - // address), ShadowPtrMap still contains an entry for Pointer_HstPtrBegin - // which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ. - if (Entry == Device.ShadowPtrMap.end() || - Entry->second.TgtPtrVal != ExpectedTgtPtrBase) { - // create or update shadow pointers for this entry - Device.ShadowPtrMap[Pointer_HstPtrBegin] = { - HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase}; - Pointer_TPR.Entry->setMayContainAttachedPointers(); - UpdateDevPtr = true; - } - - if (UpdateDevPtr) { - std::lock_guard LG(*Pointer_TPR.Entry); - Device.ShadowMtx.unlock(); - + if (Pointer_TPR.getEntry()->addShadowPointer( + ShadowPtrInfoTy{PointerHstPtrBegin, HstPtrBase, + PointerTgtPtrBegin, ExpectedTgtPtrBase})) { DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); @@ -588,16 +567,15 @@ TgtPtrBase = ExpectedTgtPtrBase; int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, - sizeof(void *), AsyncInfo); + sizeof(void *), AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } - if (Pointer_TPR.Entry->addEventIfNecessary(Device, AsyncInfo) != + if (Pointer_TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } else - Device.ShadowMtx.unlock(); + } } } @@ -631,46 +609,6 @@ DelEntry(DelEntry), Entry(Entry) {} }; -/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to -/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the -/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the -/// rest of the map is not checked anymore. -template -static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin, - uintptr_t Size, - const TargetPointerResultTy &TPR) { - // If we have an object that is too small to hold a pointer subobject, no need - // to do any checking. - if (Size < sizeof(void *)) - return; - - // If the map entry for the object was never marked as containing attached - // pointers, no need to do any checking. - if (!TPR.Entry || !TPR.Entry->getMayContainAttachedPointers()) - return; - - uintptr_t LB = (uintptr_t)Begin; - uintptr_t UB = LB + Size; - // Now we are looking into the shadow map so we need to lock it. - std::lock_guard LG(Device.ShadowMtx); - for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin(); - Itr != Device.ShadowPtrMap.end();) { - uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first; - - // An STL map is sorted on its keys; use this property - // to quickly determine when to break out of the loop. - if (ShadowHstPtrAddr < LB) { - ++Itr; - continue; - } - if (ShadowHstPtrAddr >= UB) - break; - - if (CB(Itr) == OFFLOAD_FAIL) - break; - } -} - } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -806,7 +744,7 @@ } Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, - AsyncInfo); + AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; @@ -853,11 +791,13 @@ // If we marked the entry to be deleted we need to verify no other thread // reused it by now. If deletion is still supposed to happen by this thread // LR will be set and exclusive access to the HDTT map will avoid another - // thread reusing the entry now. Note that we do not request (exclusive) - // access to the HDTT map if Info.DelEntry is not set. - LookupResult LR; + // thread reusing the entry now. DeviceTy::HDTTMapAccessorTy HDTTMap = - Device.HostDataToTargetMap.getExclusiveAccessor(!Info.DelEntry); + Device.HostDataToTargetMap.getExclusiveAccessor(); + LookupResult LR; + + // We cannot use a lock guard because we may end up delete the mutex. + Info.Entry->lock(); if (Info.DelEntry) { LR = Device.lookupMapping(HDTTMap, Info.HstPtrBegin, Info.DataSize); @@ -875,31 +815,23 @@ // need to restore the original host pointer values from their shadow // copies. If the struct is going to be deallocated, remove any remaining // shadow pointer entries for this struct. - auto CB = [&](ShadowPtrListTy::iterator &Itr) { + if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) { // If we copied the struct to the host, we need to restore the pointer. - if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) { - void **ShadowHstPtrAddr = (void **)Itr->first; - *ShadowHstPtrAddr = Itr->second.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD " for host " - "pointer " DPxMOD "\n", - DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - } - // If the struct is to be deallocated, remove the shadow entry. - if (Info.DelEntry) { - DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)); - Itr = Device.ShadowPtrMap.erase(Itr); - } else { - ++Itr; - } - return OFFLOAD_SUCCESS; - }; - applyToShadowMapEntries(Device, CB, Info.HstPtrBegin, Info.DataSize, - Info.TPR); + Info.Entry->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD " for host " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); + } // If we are deleting the entry the DataMapMtx is locked and we own the // entry. if (!Info.DelEntry || (FromMapperBase && FromMapperBase == Info.HstPtrBegin)) { + Info.Entry->unlock(); continue; } @@ -945,49 +877,63 @@ if (ArgType & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo); + int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo, + TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; } - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - void **ShadowHstPtrAddr = (void **)Itr->first; - // Wait for device-to-host memcopies for whole struct to complete, - // before restoring the correct host pointer. - if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS) - return OFFLOAD_FAIL; - *ShadowHstPtrAddr = Itr->second.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD - " for host pointer " DPxMOD "\n", - DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - ++Itr; - return OFFLOAD_SUCCESS; - }; - applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR); + if (TPR.getEntry()) { + int Ret = TPR.getEntry()->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + // Wait for device-to-host memcopies for whole struct to complete, + // before restoring the correct host pointer. + if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD + " for host pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); + if (Ret != OFFLOAD_SUCCESS) { + DP("Updating shadow map failed\n"); + return Ret; + } + } } if (ArgType & OMP_TGT_MAPTYPE_TO) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo); + int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo, + TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - DP("Restoring original target pointer value " DPxMOD " for target " - "pointer " DPxMOD "\n", - DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr)); - Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal, - sizeof(void *), AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) - REPORT("Copying data to device failed.\n"); - ++Itr; - return Ret; - }; - applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR); + if (TPR.getEntry()) { + int Ret = TPR.getEntry()->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + DP("Restoring original target pointer value " DPxMOD " for target " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr)); + Ret = Device.submitData(ShadowPtr.TgtPtrAddr, + (void *)&ShadowPtr.TgtPtrVal, + sizeof(void *), AsyncInfo, TPR.getEntry()); + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; + }); + if (Ret != OFFLOAD_SUCCESS) { + DP("Updating shadow map failed\n"); + return Ret; + } + } } return OFFLOAD_SUCCESS; } @@ -1387,7 +1333,7 @@ DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, - sizeof(void *), AsyncInfo); + sizeof(void *), AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL;