diff --git a/openmp/libomptarget/include/ExclusiveAccess.h b/openmp/libomptarget/include/ExclusiveAccess.h --- a/openmp/libomptarget/include/ExclusiveAccess.h +++ b/openmp/libomptarget/include/ExclusiveAccess.h @@ -48,6 +48,12 @@ /// Constructor to get exclusive access by taking it from \p Other. Accessor(Accessor &&Other) : Ptr(Other.Ptr) { Other.Ptr = nullptr; } + /// Move-assign to get exclusive access by taking it from \p Other. + Accessor &operator=(Accessor &&Other) { + Ptr = Other.Ptr; + Other.Ptr = nullptr; + } + Accessor(Accessor &Other) = delete; /// If the object is still owned when the lifetime ends we give up access. diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -13,6 +13,7 @@ #ifndef _OMPTARGET_DEVICE_H #define _OMPTARGET_DEVICE_H +#include #include #include #include @@ -43,6 +44,37 @@ }; typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; +struct ScopedAtomicCounter { +private: + std::atomic *Counter = nullptr; + +public: + ScopedAtomicCounter() = default; + + ScopedAtomicCounter(std::atomic &C) : Counter(&C) { + if (Counter) { + (*Counter)++; + assert(*Counter > 0); + } + } + + ~ScopedAtomicCounter() { + if (Counter) { + (*Counter)--; + assert(*Counter >= 0); + } + } + + ScopedAtomicCounter(ScopedAtomicCounter &&Other) : Counter(Other.Counter) { + Other.Counter = nullptr; + } + ScopedAtomicCounter &operator=(ScopedAtomicCounter &&Other) { + Counter = Other.Counter; + Other.Counter = nullptr; + return *this; + } +}; + /// Map between host data and target data. struct HostDataToTargetTy { const uintptr_t HstPtrBase; // host info. @@ -61,7 +93,7 @@ struct StatesTy { StatesTy(uint64_t DRC, uint64_t HRC) : DynRefCount(DRC), HoldRefCount(HRC), - MayContainAttachedPointers(false), DeleteThreadId(std::thread::id()) { + MayContainAttachedPointers(false), DeleterThreadCount(0) { } /// The dynamic reference count is the standard reference count as of OpenMP /// 4.5. The hold reference count is an OpenMP extension for the sake of @@ -101,13 +133,8 @@ /// should be written as void *Event[2]. void *Event = nullptr; - /// The id of the thread responsible for deleting this entry. This thread - /// set the reference count to zero *last*. Other threads might reuse the - /// entry while it is marked for deletion but not yet deleted (e.g., the - /// data is still being moved back). If another thread reuses the entry we - /// will have a non-zero reference count *or* the thread will have changed - /// this id, effectively taking over deletion responsibility. - std::thread::id DeleteThreadId; + /// TODO: Describe it! + std::atomic DeleterThreadCount; }; // When HostDataToTargetTy is used by std::set, std::set::iterator is const // use unique_ptr to make States mutable. @@ -148,13 +175,15 @@ /// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise. int addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const; - /// Indicate that the current thread expected to delete this entry. - void setDeleteThreadId() const { - States->DeleteThreadId = std::this_thread::get_id(); + /// TODO: Describe it! + int getDeleterThreadCount() { + return States->DeleterThreadCount; } - /// Return the thread id of the thread expected to delete this entry. - std::thread::id getDeleteThreadId() const { return States->DeleteThreadId; } + /// TODO: Describe it! + ScopedAtomicCounter getScopedDeleterThreadCounter() { + return ScopedAtomicCounter(States->DeleterThreadCount); + } /// Set the event bound to this data map. void setEvent(void *Event) const { States->Event = Event; } @@ -288,6 +317,9 @@ /// The corresponding target pointer void *TargetPointer = nullptr; + + /// TODO: Describe it! + ScopedAtomicCounter ThreadDeleteCounter = {}; }; /// Map for shadow pointers @@ -379,8 +411,10 @@ bool &IsLast, bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr, bool MustContain = false, - bool ForceDelete = false); + bool ForceDelete = false, + bool FromDataEnd = false); + /// TODO: Update it! /// Deallocate \p LR and remove the entry. Assume the total reference count is /// zero and the calling thread is the deleting thread for \p LR. \p HDTTMap /// ensure the caller holds exclusive access and can modify the map. Return \c @@ -388,7 +422,10 @@ /// not. It is the caller's responsibility to skip calling this function if /// the map entry is not expected to exist because \p HstPtrBegin uses shared /// memory. - int deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR, int64_t Size); + void eraseMapEntry(HDTTMapAccessorTy &HDTTMap, HostDataToTargetTy *Entry, int64_t Size); + + /// TODO: Describe it! + int deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int disassociatePtr(void *HstPtrBegin); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -360,9 +360,11 @@ TargetPointerResultTy DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool UseHoldRefCount, - bool &IsHostPtr, bool MustContain, bool ForceDelete) { + bool &IsHostPtr, bool MustContain, bool ForceDelete, + bool FromDataEnd) { HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); + ScopedAtomicCounter DeleterScopedCounter; void *TargetPointer = NULL; bool IsNew = false; IsHostPtr = false; @@ -380,15 +382,14 @@ "expected correct IsLast prediction for reset"); } + if (FromDataEnd) { + DeleterScopedCounter = LR.Entry->getScopedDeleterThreadCounter(); + } + const char *RefCountAction; if (!UpdateRefCount) { RefCountAction = " (update suppressed)"; } else if (IsLast) { - // Mark the entry as to be deleted by this thread. Another thread might - // reuse the entry and take "ownership" for the deletion while this thread - // is waiting for data transfers. That is fine and the current thread will - // simply skip the deletion step then. - HT.setDeleteThreadId(); HT.decRefCount(UseHoldRefCount); assert(HT.getTotalRefCount() == 0 && "Expected zero reference count when deletion is scheduled"); @@ -420,7 +421,10 @@ TargetPointer = HstPtrBegin; } - return {{IsNew, IsHostPtr}, LR.Entry, TargetPointer}; + return {{IsNew, IsHostPtr}, + LR.Entry, + TargetPointer, + std::move(DeleterScopedCounter)}; } // Return the target pointer begin (where the data will be moved). @@ -437,40 +441,36 @@ return NULL; } -int DeviceTy::deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR, - int64_t Size) { - // Check if the pointer is contained in any sub-nodes. - if (!(LR.Flags.IsContained || LR.Flags.ExtendsBefore || - LR.Flags.ExtendsAfter)) { - REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the" - " allocated memory\n", - DPxPTR(LR.Entry->HstPtrBegin)); - return OFFLOAD_FAIL; - } +void DeviceTy::eraseMapEntry(HDTTMapAccessorTy &HDTTMap, HostDataToTargetTy *Entry, int64_t Size) { + assert(Entry && "Trying to delete a null entry from the HDTT map."); - auto &HT = *LR.Entry; - // Verify this thread is still in charge of deleting the entry. - assert(HT.getTotalRefCount() == 0 && - HT.getDeleteThreadId() == std::this_thread::get_id() && + // Verify this thread is the only deleter holding a reference to the entry. + assert(Entry->getTotalRefCount() == 0 && Entry->getDeleterThreadCount() == 1 && "Trying to delete entry that is in use or owned by another thread."); - DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", - DPxPTR(HT.TgtPtrBegin), Size); - deleteData((void *)HT.TgtPtrBegin); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%" PRId64 ", Name=%s\n", - DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size, - (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown"); - void *Event = LR.Entry->getEvent(); - HDTTMap->erase(LR.Entry); - delete LR.Entry; + DPxPTR(Entry->HstPtrBegin), DPxPTR(Entry->TgtPtrBegin), Size, + (Entry->HstPtrName) ? getNameFromMapping(Entry->HstPtrName).c_str() : "unknown"); - int Ret = OFFLOAD_SUCCESS; + HDTTMap->erase(Entry); +} + +int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) { + assert(Entry && "Trying to deallocate a null entry."); + + DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", + DPxPTR(Entry->TgtPtrBegin), Size); + + void *Event = Entry->getEvent(); if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event)); - Ret = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } + + int Ret = deleteData((void *)Entry->TgtPtrBegin); + delete Entry; return Ret; } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -623,14 +623,10 @@ /// The target pointer information. TargetPointerResultTy TPR; - /// Are we expecting to delete this entry or not. Even if set, we might not - /// delete the entry if another thread reused the entry in the meantime. - bool DelEntry; - PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType, bool DelEntry, - TargetPointerResultTy TPR) - : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), TPR(TPR), - DelEntry(DelEntry) {} + TargetPointerResultTy &&TPR) + : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), + TPR(std::move(TPR)) {} }; /// Apply \p CB to the shadow map pointer entries in the range \p Begin, to @@ -673,6 +669,67 @@ } } +/// TODO: Describe it! +static int targetDataEndPostProcessing(DeviceTy &Device, void *FromMapperBase, + void *HstPtrBegin, int64_t DataSize, + int64_t ArgType, + TargetPointerResultTy TPR) { + int Ret = OFFLOAD_SUCCESS; + + // Only delete the entry when the total ref count is zero and the current + // thread is the sole deleter. + auto HDTTMap = Device.HostDataToTargetMap.getExclusiveAccessor(); + bool DelEntry = true; + if (TPR.Flags.IsHostPointer || TPR.Entry->getTotalRefCount() != 0 || + TPR.Entry->getDeleterThreadCount() != 1) { + HDTTMap.destroy(); + DelEntry = false; + } + + // If we copied back to the host a struct/array containing pointers, we + // need to restore the original host pointer values from their shadow + // copies. If the struct is going to be deallocated, remove any remaining + // shadow pointer entries for this struct. + auto CB = [&](ShadowPtrListTy::iterator &Itr) { + // If we copied the struct to the host, we need to restore the pointer. + if (ArgType & OMP_TGT_MAPTYPE_FROM) { + void **ShadowHstPtrAddr = (void **)Itr->first; + *ShadowHstPtrAddr = Itr->second.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD " for host " + "pointer " DPxMOD "\n", + DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); + } + // If the struct is to be deallocated, remove the shadow entry. + if (DelEntry) { + DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)); + auto OldItr = Itr; + Itr++; + Device.ShadowPtrMap.erase(OldItr); + } else { + ++Itr; + } + return OFFLOAD_SUCCESS; + }; + applyToShadowMapEntries(Device, CB, HstPtrBegin, DataSize, TPR); + + // If we are deleting the entry the DataMapMtx is locked and we own the + // entry. + if (DelEntry) { + if (!FromMapperBase || FromMapperBase != HstPtrBegin) { + Device.eraseMapEntry(HDTTMap, TPR.Entry, DataSize); + // Since all necessary operations on the HDTTMap are completed, there is + // no need to hold its lock any more. + HDTTMap.destroy(); + Ret = Device.deallocTgtPtrAndEntry(TPR.Entry, DataSize); + } + + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Deallocating data from device failed.\n"); + } + } + + return Ret; +} } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -738,11 +795,12 @@ bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE; bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT; bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD; + const bool FromDataEnd = true; // If PTR_AND_OBJ, HstPtrBegin is address of pointee TargetPointerResultTy TPR = Device.getTgtPtrBegin( HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr, - !IsImplicit, ForceDelete); + !IsImplicit, ForceDelete, FromDataEnd); void *TgtPtrBegin = TPR.TargetPointer; if (!TgtPtrBegin && (DataSize || HasPresentModifier)) { DP("Mapping does not exist (%s)\n", @@ -835,7 +893,7 @@ // Add pointer to the buffer for post-synchronize processing. PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I], - DelEntry && !IsHostPtr, TPR); + DelEntry && !IsHostPtr, std::move(TPR)); } } @@ -849,64 +907,13 @@ // Deallocate target pointer for (PostProcessingInfo &Info : PostProcessingPtrs) { - // If we marked the entry to be deleted we need to verify no other thread - // reused it by now. If deletion is still supposed to happen by this thread - // LR will be set and exclusive access to the HDTT map will avoid another - // thread reusing the entry now. Note that we do not request (exclusive) - // access to the HDTT map if Info.DelEntry is not set. - LookupResult LR; - DeviceTy::HDTTMapAccessorTy HDTTMap = - Device.HostDataToTargetMap.getExclusiveAccessor(!Info.DelEntry); - - if (Info.DelEntry) { - LR = Device.lookupMapping(HDTTMap, Info.HstPtrBegin, Info.DataSize); - if (LR.Entry->getTotalRefCount() != 0 || - LR.Entry->getDeleteThreadId() != std::this_thread::get_id()) { - // The thread is not in charge of deletion anymore. Give up access to - // the HDTT map and unset the deletion flag. - HDTTMap.destroy(); - Info.DelEntry = false; - } - } - - // If we copied back to the host a struct/array containing pointers, we - // need to restore the original host pointer values from their shadow - // copies. If the struct is going to be deallocated, remove any remaining - // shadow pointer entries for this struct. - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - // If we copied the struct to the host, we need to restore the pointer. - if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) { - void **ShadowHstPtrAddr = (void **)Itr->first; - *ShadowHstPtrAddr = Itr->second.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD " for host " - "pointer " DPxMOD "\n", - DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - } - // If the struct is to be deallocated, remove the shadow entry. - if (Info.DelEntry) { - DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)); - auto OldItr = Itr; - Itr++; - Device.ShadowPtrMap.erase(OldItr); - } else { - ++Itr; - } - return OFFLOAD_SUCCESS; - }; - applyToShadowMapEntries(Device, CB, Info.HstPtrBegin, Info.DataSize, - Info.TPR); - - // If we are deleting the entry the DataMapMtx is locked and we own the - // entry. - if (Info.DelEntry) { - if (!FromMapperBase || FromMapperBase != Info.HstPtrBegin) - Ret = Device.deallocTgtPtr(HDTTMap, LR, Info.DataSize); - - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Deallocating data from device failed.\n"); - break; - } - } + // Move TPR to the inner scope. This way, scoped managed variables (e.g., + // Info.TPR.ThreadDeleteCounter) can be correctly deconstructed. + Ret = targetDataEndPostProcessing(Device, FromMapperBase, Info.HstPtrBegin, + Info.DataSize, Info.ArgType, + std::move(Info.TPR)); + if (Ret != OFFLOAD_SUCCESS) + break; } return Ret;