diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -60,7 +60,7 @@
struct StatesTy {
StatesTy(uint64_t DRC, uint64_t HRC)
: DynRefCount(DRC), HoldRefCount(HRC),
- MayContainAttachedPointers(false) {}
+ MayContainAttachedPointers(false), DeleteThreadId(-1) {}
/// The dynamic reference count is the standard reference count as of OpenMP
/// 4.5. The hold reference count is an OpenMP extension for the sake of
/// OpenACC support.
@@ -98,6 +98,14 @@
/// mechanism for D2H, and if the event cannot be shared between them, Event
/// should be written as void *Event[2].
void *Event = nullptr;
+
+ /// The id of the thread responsible for deleting this entry. This thread
+ /// set the reference count to zero *last*. Other threads might reuse the
+ /// entry while it is marked for deletion but not yet deleted (e.g., the
+ /// data is still being moved back). If another thread reuses the entry we
+ /// will have a non-zero reference count *or* the thread will have changed
+ /// this id, effectively taking over deletion responsibility.
+ int32_t DeleteThreadId;
};
// When HostDataToTargetTy is used by std::set, std::set::iterator is const
// use unique_ptr to make States mutable.
@@ -142,6 +150,12 @@
/// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise.
int waitEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const;
+ /// Indicate that \p TId is the thread expected to delete this entry.
+ void setDeleteThreadId(int32_t TId) const { States->DeleteThreadId = TId; }
+
+ /// Return the thread id of the thread expected to delete this entry.
+ int32_t getDeleteThreadId() const { return States->DeleteThreadId; }
+
/// Set the event bound to this data map.
void setEvent(void *Event) const { States->Event = Event; }
@@ -176,7 +190,7 @@
if (ThisRefCount > 0)
--ThisRefCount;
else
- assert(OtherRefCount > 0 && "total refcount underflow");
+ assert(OtherRefCount >= 0 && "total refcount underflow");
}
return getTotalRefCount();
}
@@ -366,14 +380,16 @@
bool UseHoldRefCount, bool &IsHostPtr,
bool MustContain = false,
bool ForceDelete = false);
- /// For the map entry for \p HstPtrBegin, decrement the reference count
- /// specified by \p HasHoldModifier and, if the the total reference count is
- /// then zero, deallocate the corresponding device storage and remove the map
- /// entry. Return \c OFFLOAD_SUCCESS if the map entry existed, and return
- /// \c OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling
- /// this function if the map entry is not expected to exist because
- /// \p HstPtrBegin uses shared memory.
- int deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool HasHoldModifier);
+
+ /// Deallocate \p LR and remove the entry. Assume the total reference count is
+ /// zero and the calling thread is the deleting thread for \p LR. \p HDTTMap
+ /// ensure the caller holds exclusive access and can modify the map. Return \c
+ /// OFFLOAD_SUCCESS if the map entry existed, and return \c OFFLOAD_FAIL if
+ /// not. It is the caller's responsibility to skip calling this function if
+ /// the map entry is not expected to exist because \p HstPtrBegin uses shared
+ /// memory.
+ int deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR, int64_t Size);
+
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
int disassociatePtr(void *HstPtrBegin);
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -223,9 +223,10 @@
((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) {
auto &HT = *LR.Entry;
const char *RefCountAction;
- assert(HT.getTotalRefCount() > 0 && "expected existing RefCount > 0");
if (UpdateRefCount) {
- // After this, RefCount > 1.
+ // After this, reference count >= 1. If the reference count was 0 but the
+ // entry was still there we can reuse the data on the device and avoid a
+ // new submission.
HT.incRefCount(HasHoldModifier);
RefCountAction = " (incremented)";
} else {
@@ -366,27 +367,30 @@
if (lr.Flags.IsContained ||
(!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter))) {
auto &HT = *lr.Entry;
- // We do not zero the total reference count here. deallocTgtPtr does that
- // atomically with removing the mapping. Otherwise, before this thread
- // removed the mapping in deallocTgtPtr, another thread could retrieve the
- // mapping, increment and decrement back to zero, and then both threads
- // would try to remove the mapping, resulting in a double free.
IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete);
- const char *RefCountAction;
- if (!UpdateRefCount) {
- RefCountAction = " (update suppressed)";
- } else if (ForceDelete) {
+
+ if (ForceDelete) {
HT.resetRefCount(UseHoldRefCount);
assert(IsLast == HT.decShouldRemove(UseHoldRefCount) &&
"expected correct IsLast prediction for reset");
- if (IsLast)
- RefCountAction = " (reset, deferred final decrement)";
- else {
- HT.decRefCount(UseHoldRefCount);
- RefCountAction = " (reset)";
- }
+ }
+
+ const char *RefCountAction;
+ if (!UpdateRefCount) {
+ RefCountAction = " (update suppressed)";
} else if (IsLast) {
- RefCountAction = " (deferred final decrement)";
+ // Mark the entry as to be deleted by this thread. Another thread might
+ // reuse the entry and take "ownership" for the deletion while this thread
+ // is waiting for data transfers. That is fine and the current thread will
+ // simply skip the deletion step then.
+ HT.setDeleteThreadId(__kmpc_global_thread_num(NULL));
+ HT.decRefCount(UseHoldRefCount);
+ assert(HT.getTotalRefCount() == 0 &&
+ "Expected zero reference count when deletion is scheduled");
+ if (ForceDelete)
+ RefCountAction = " (reset, delayed deletion)";
+ else
+ RefCountAction = " (decremented, delayed deletion)";
} else {
HT.decRefCount(UseHoldRefCount);
RefCountAction = " (decremented)";
@@ -428,37 +432,37 @@
return NULL;
}
-int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size,
- bool HasHoldModifier) {
- HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
-
+int DeviceTy::deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR, int64_t Size) {
// Check if the pointer is contained in any sub-nodes.
- int Ret = OFFLOAD_SUCCESS;
- LookupResult lr = lookupMapping(HDTTMap, HstPtrBegin, Size);
- if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
- auto &HT = *lr.Entry;
- if (HT.decRefCount(HasHoldModifier) == 0) {
- DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
- DPxPTR(HT.TgtPtrBegin), Size);
- deleteData((void *)HT.TgtPtrBegin);
- INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
- "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
- ", Size=%" PRId64 ", Name=%s\n",
- DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size,
- (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str()
- : "unknown");
- void *Event = lr.Entry->getEvent();
- HDTTMap->erase(lr.Entry);
- delete lr.Entry;
- if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) {
- REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event));
- Ret = OFFLOAD_FAIL;
- }
- }
- } else {
+ if (!(LR.Flags.IsContained || LR.Flags.ExtendsBefore ||
+ LR.Flags.ExtendsAfter)) {
REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the"
" allocated memory\n",
- DPxPTR(HstPtrBegin));
+ DPxPTR(LR.Entry->HstPtrBegin));
+ return OFFLOAD_FAIL;
+ }
+
+ auto &HT = *LR.Entry;
+ // Verify this thread is still in charge of deleting the entry.
+ assert(HT.getTotalRefCount() == 0 &&
+ HT.getDeleteThreadId() == __kmpc_global_thread_num(nullptr) &&
+ "Trying to delete entry that is in use or owned by another thread.");
+
+ DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
+ DPxPTR(HT.TgtPtrBegin), Size);
+ deleteData((void *)HT.TgtPtrBegin);
+ INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
+ "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
+ ", Size=%" PRId64 ", Name=%s\n",
+ DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size,
+ (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown");
+ void *Event = LR.Entry->getEvent();
+ HDTTMap->erase(LR.Entry);
+ delete LR.Entry;
+
+ int Ret = OFFLOAD_SUCCESS;
+ if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) {
+ REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event));
Ret = OFFLOAD_FAIL;
}
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -607,17 +607,29 @@
namespace {
/// This structure contains information to deallocate a target pointer, aka.
-/// used to call the function \p DeviceTy::deallocTgtPtr.
-struct DeallocTgtPtrInfo {
+/// used to fix up the shadow map and potentially delete the entry from the
+/// mapping table via \p DeviceTy::deallocTgtPtr.
+struct PostProcessingInfo {
/// Host pointer used to look up into the map table
void *HstPtrBegin;
+
/// Size of the data
int64_t DataSize;
- /// Whether it has \p ompx_hold modifier
- bool HasHoldModifier;
- DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasHoldModifier)
- : HstPtrBegin(HstPtr), DataSize(Size), HasHoldModifier(HasHoldModifier) {}
+ /// The mapping type (bitfield).
+ int64_t ArgType;
+
+ /// The target pointer information.
+ TargetPointerResultTy TPR;
+
+ /// Are we expecting to delete this entry or not. Even if set, we might not
+ /// delete the entry if another thread reused the entry in the meantime.
+ bool DelEntry;
+
+ PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType, bool DelEntry,
+ TargetPointerResultTy TPR)
+ : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), TPR(TPR),
+ DelEntry(DelEntry) {}
};
/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to
@@ -668,7 +680,7 @@
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
int Ret;
- std::vector DeallocTgtPtrs;
+ std::vector PostProcessingPtrs;
void *FromMapperBase = nullptr;
// process each input.
for (int32_t I = ArgNum - 1; I >= 0; --I) {
@@ -786,12 +798,33 @@
if ((Always || IsLast) && !IsHostPtr) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
+
+ std::lock_guard LG(*TPR.MapTableEntry);
+ // Wait for any previous transfer if an event is present.
+ if (void *Event = TPR.MapTableEntry->getEvent()) {
+ if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
+ REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event));
+ return OFFLOAD_FAIL;
+ }
+ }
+
Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize,
AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Copying data from device failed.\n");
return OFFLOAD_FAIL;
}
+
+ // As we are expecting to delete the entry the d2h copy might race
+ // with another one that also tries to delete the entry. This happens
+ // as the entry can be reused and the reuse might happen after the
+ // copy-back was issued but before it completed. Since the reuse might
+ // also copy-back a value we would race.
+ if (IsLast) {
+ if (TPR.MapTableEntry->recordEventIfNecessary(Device, AsyncInfo) !=
+ OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+ }
}
}
if (DelEntry && FromMapper && I == 0) {
@@ -799,38 +832,9 @@
FromMapperBase = HstPtrBegin;
}
- // If we copied back to the host a struct/array containing pointers, we
- // need to restore the original host pointer values from their shadow
- // copies. If the struct is going to be deallocated, remove any remaining
- // shadow pointer entries for this struct.
- auto CB = [&](ShadowPtrListTy::iterator &Itr) {
- // If we copied the struct to the host, we need to restore the pointer.
- if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {
- void **ShadowHstPtrAddr = (void **)Itr->first;
- // Wait for device-to-host memcopies for whole struct to complete,
- // before restoring the correct host pointer.
- if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS)
- return OFFLOAD_FAIL;
- *ShadowHstPtrAddr = Itr->second.HstPtrVal;
- DP("Restoring original host pointer value " DPxMOD " for host "
- "pointer " DPxMOD "\n",
- DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
- }
- // If the struct is to be deallocated, remove the shadow entry.
- if (DelEntry) {
- DP("Removing shadow pointer " DPxMOD "\n",
- DPxPTR((void **)Itr->first));
- Itr = Device.ShadowPtrMap.erase(Itr);
- } else {
- ++Itr;
- }
- return OFFLOAD_SUCCESS;
- };
- applyToShadowMapEntries(Device, CB, HstPtrBegin, DataSize, TPR);
-
- // Add pointer to the buffer for later deallocation
- if (DelEntry && !IsHostPtr)
- DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasHoldModifier);
+ // Add pointer to the buffer for post-synchronize processing.
+ PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I],
+ DelEntry && !IsHostPtr, TPR);
}
}
@@ -843,18 +847,66 @@
return OFFLOAD_FAIL;
// Deallocate target pointer
- for (DeallocTgtPtrInfo &Info : DeallocTgtPtrs) {
- if (FromMapperBase && FromMapperBase == Info.HstPtrBegin)
- continue;
- Ret = Device.deallocTgtPtr(Info.HstPtrBegin, Info.DataSize,
- Info.HasHoldModifier);
- if (Ret != OFFLOAD_SUCCESS) {
- REPORT("Deallocating data from device failed.\n");
- return OFFLOAD_FAIL;
+ for (PostProcessingInfo &Info : PostProcessingPtrs) {
+ // If we marked the entry to be deleted we need to verify no other thread
+ // reused it by now. If deletion is still supposed to happen by this thread
+ // LR will be set and exclusive access to the HDTT map will avoid another
+ // thread reusing the entry now. Note that we do not request (exclusive)
+ // access to the HDTT map if Info.DelEntry is not set.
+ LookupResult LR;
+ DeviceTy::HDTTMapAccessorTy HDTTMap =
+ Device.HostDataToTargetMap.getExclusiveAccessor(!Info.DelEntry);
+
+ if (Info.DelEntry) {
+ LR = Device.lookupMapping(HDTTMap, Info.HstPtrBegin, Info.DataSize);
+ if (LR.Entry->getTotalRefCount() != 0 ||
+ LR.Entry->getDeleteThreadId() != __kmpc_global_thread_num(nullptr)) {
+ // The thread is not in charge of deletion anymore. Give up access to
+ // the HDTT map and unset the deletion flag.
+ HDTTMap.destroy();
+ Info.DelEntry = false;
+ }
+ }
+
+ // If we copied back to the host a struct/array containing pointers, we
+ // need to restore the original host pointer values from their shadow
+ // copies. If the struct is going to be deallocated, remove any remaining
+ // shadow pointer entries for this struct.
+ auto CB = [&](ShadowPtrListTy::iterator &Itr) {
+ // If we copied the struct to the host, we need to restore the pointer.
+ if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) {
+ void **ShadowHstPtrAddr = (void **)Itr->first;
+ *ShadowHstPtrAddr = Itr->second.HstPtrVal;
+ DP("Restoring original host pointer value " DPxMOD " for host "
+ "pointer " DPxMOD "\n",
+ DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
+ }
+ // If the struct is to be deallocated, remove the shadow entry.
+ if (Info.DelEntry) {
+ DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first));
+ Itr = Device.ShadowPtrMap.erase(Itr);
+ } else {
+ ++Itr;
+ }
+ return OFFLOAD_SUCCESS;
+ };
+ applyToShadowMapEntries(Device, CB, Info.HstPtrBegin, Info.DataSize,
+ Info.TPR);
+
+ // If we are deleting the entry the DataMapMtx is locked and we own the
+ // entry.
+ if (Info.DelEntry) {
+ if (!FromMapperBase || FromMapperBase != Info.HstPtrBegin)
+ Ret = Device.deallocTgtPtr(HDTTMap, LR, Info.DataSize);
+
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT("Deallocating data from device failed.\n");
+ break;
+ }
}
}
- return OFFLOAD_SUCCESS;
+ return Ret;
}
static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase,