diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -60,9 +60,7 @@ struct StatesTy { StatesTy(uint64_t DRC, uint64_t HRC) - : DynRefCount(DRC), HoldRefCount(HRC), - MayContainAttachedPointers(false), DeleteThreadId(std::thread::id()) { - } + : DynRefCount(DRC), HoldRefCount(HRC) {} /// The dynamic reference count is the standard reference count as of OpenMP /// 4.5. The hold reference count is an OpenMP extension for the sake of /// OpenACC support. @@ -101,13 +99,13 @@ /// should be written as void *Event[2]. void *Event = nullptr; - /// The id of the thread responsible for deleting this entry. This thread - /// set the reference count to zero *last*. Other threads might reuse the - /// entry while it is marked for deletion but not yet deleted (e.g., the - /// data is still being moved back). If another thread reuses the entry we - /// will have a non-zero reference count *or* the thread will have changed - /// this id, effectively taking over deletion responsibility. - std::thread::id DeleteThreadId; + /// Counter used to coordinate the deletion of an entry. Each thread that + /// decides the entry is to be deleted will increment the counter. As there + /// is a gap between deciding on deletion and actual deletion, e.g., to wait + /// for the copy back, the thread has to decrement the counter before + /// deletion is issued. Only if it is 0, and the ref count is 0 too, we + /// delete the entry. + unsigned short DeleteCount = 0; }; // When HostDataToTargetTy is used by std::set, std::set::iterator is const // use unique_ptr to make States mutable. @@ -148,14 +146,6 @@ /// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise. int addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const; - /// Indicate that the current thread expected to delete this entry. - void setDeleteThreadId() const { - States->DeleteThreadId = std::this_thread::get_id(); - } - - /// Return the thread id of the thread expected to delete this entry. - std::thread::id getDeleteThreadId() const { return States->DeleteThreadId; } - /// Set the event bound to this data map. void setEvent(void *Event) const { States->Event = Event; } @@ -229,6 +219,14 @@ return States->MayContainAttachedPointers; } + /// Increment the delete counter indicating that this thread plans to delete + /// the entry. + void incDeleteCount() const { ++States->DeleteCount; } + + /// Decrement the delete counter prior to deletion. If the return value is 0 + /// deletion should take place, otherwise the entry is still needed. + unsigned decDeleteCount() const { return --States->DeleteCount; } + void lock() const { States->UpdateMtx.lock(); } void unlock() const { States->UpdateMtx.unlock(); } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -618,17 +618,17 @@ /// The mapping type (bitfield). int64_t ArgType; - /// The target pointer information. - TargetPointerResultTy TPR; - /// Are we expecting to delete this entry or not. Even if set, we might not /// delete the entry if another thread reused the entry in the meantime. bool DelEntry; + /// The entry we currently copy back and which we might delete. + HostDataToTargetTy *Entry; + PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType, bool DelEntry, - TargetPointerResultTy TPR) - : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), TPR(TPR), - DelEntry(DelEntry) {} + HostDataToTargetTy *Entry) + : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), + DelEntry(DelEntry), Entry(Entry) {} }; /// Apply \p CB to the shadow map pointer entries in the range \p Begin, to @@ -830,8 +830,9 @@ } // Add pointer to the buffer for post-synchronize processing. - PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I], - DelEntry && !IsHostPtr, TPR); + if (auto *Entry = TPR.getEntry()) + PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I], + DelEntry && !TPR.isHostPtr(), Entry); } } @@ -844,7 +845,11 @@ return OFFLOAD_FAIL; // Deallocate target pointer + std::set DeletedEntries; for (PostProcessingInfo &Info : PostProcessingPtrs) { + if (DeletedEntries.count(Info.Entry)) + continue; + // If we marked the entry to be deleted we need to verify no other thread // reused it by now. If deletion is still supposed to happen by this thread // LR will be set and exclusive access to the HDTT map will avoid another @@ -856,8 +861,9 @@ if (Info.DelEntry) { LR = Device.lookupMapping(HDTTMap, Info.HstPtrBegin, Info.DataSize); + assert(LR.Entry == Info.Entry && "Entry disappeared unexpectedly"); if (LR.Entry->getTotalRefCount() != 0 || - LR.Entry->getDeleteThreadId() != std::this_thread::get_id()) { + LR.Entry->decDeleteCount() != 0) { // The thread is not in charge of deletion anymore. Give up access to // the HDTT map and unset the deletion flag. HDTTMap.destroy(); @@ -892,14 +898,19 @@ // If we are deleting the entry the DataMapMtx is locked and we own the // entry. - if (Info.DelEntry) { - if (!FromMapperBase || FromMapperBase != Info.HstPtrBegin) - Ret = Device.deallocTgtPtr(HDTTMap, LR, Info.DataSize); + if (!Info.DelEntry || + (FromMapperBase && FromMapperBase == Info.HstPtrBegin)) { + continue; + } - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Deallocating data from device failed.\n"); - break; - } + // Remember we deleted the entry in case it shows up in the + // PostProcessingPtrs again. + DeletedEntries.insert(Info.Entry); + auto Ret = Device.deallocTgtPtr(HDTTMap, LR, Info.DataSize); + + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Deallocating data from device failed.\n"); + break; } }