diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -61,8 +61,7 @@
struct StatesTy {
StatesTy(uint64_t DRC, uint64_t HRC)
: DynRefCount(DRC), HoldRefCount(HRC),
- MayContainAttachedPointers(false), DeleteThreadId(std::thread::id()) {
- }
+ MayContainAttachedPointers(false) {}
/// The dynamic reference count is the standard reference count as of OpenMP
/// 4.5. The hold reference count is an OpenMP extension for the sake of
/// OpenACC support.
@@ -101,13 +100,10 @@
/// should be written as void *Event[2].
void *Event = nullptr;
- /// The id of the thread responsible for deleting this entry. This thread
- /// set the reference count to zero *last*. Other threads might reuse the
- /// entry while it is marked for deletion but not yet deleted (e.g., the
- /// data is still being moved back). If another thread reuses the entry we
- /// will have a non-zero reference count *or* the thread will have changed
- /// this id, effectively taking over deletion responsibility.
- std::thread::id DeleteThreadId;
+ /// Number of threads currently holding a reference to the entry at a
+ /// targetDataEnd. This is used to ensure that only the last thread that
+ /// references this entry will actually delete it.
+ int32_t DataEndThreadCount = 0;
};
// When HostDataToTargetTy is used by std::set, std::set::iterator is const
// use unique_ptr to make States mutable.
@@ -148,13 +144,17 @@
/// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise.
int addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const;
- /// Indicate that the current thread expected to delete this entry.
- void setDeleteThreadId() const {
- States->DeleteThreadId = std::this_thread::get_id();
+ /// Functions that manages the number of threads referencing the entry in a
+ /// targetDataEnd.
+ void incDataEndThreadCount() { ++States->DataEndThreadCount; }
+
+ [[nodiscard]] int32_t decDataEndThreadCount() {
+ return --States->DataEndThreadCount;
}
- /// Return the thread id of the thread expected to delete this entry.
- std::thread::id getDeleteThreadId() const { return States->DeleteThreadId; }
+ [[nodiscard]] int32_t getDataEndThreadCount() const {
+ return States->DataEndThreadCount;
+ }
/// Set the event bound to this data map.
void setEvent(void *Event) const { States->Event = Event; }
@@ -381,16 +381,24 @@
bool &IsLast, bool UpdateRefCount,
bool UseHoldRefCount, bool &IsHostPtr,
bool MustContain = false,
- bool ForceDelete = false);
-
- /// Deallocate \p LR and remove the entry. Assume the total reference count is
- /// zero and the calling thread is the deleting thread for \p LR. \p HDTTMap
- /// ensure the caller holds exclusive access and can modify the map. Return \c
- /// OFFLOAD_SUCCESS if the map entry existed, and return \c OFFLOAD_FAIL if
- /// not. It is the caller's responsibility to skip calling this function if
- /// the map entry is not expected to exist because \p HstPtrBegin uses shared
- /// memory.
- int deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR, int64_t Size);
+ bool ForceDelete = false,
+ bool FromDataEnd = false);
+
+ /// Remove the \p Entry from the data map. Expect the entry's total reference
+ /// count to be zero and the caller thread to be the last one using it. \p
+ /// HDTTMap ensure the caller holds exclusive access and can modify the map.
+ /// Return \c OFFLOAD_SUCCESS if the map entry existed, and return \c
+ /// OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling
+ /// this function if the map entry is not expected to exist because \p
+ /// HstPtrBegin uses shared memory.
+ [[nodiscard]] int eraseMapEntry(HDTTMapAccessorTy &HDTTMap,
+ HostDataToTargetTy *Entry, int64_t Size);
+
+ /// Deallocate the \p Entry from the device memory and delete it. Return \c
+ /// OFFLOAD_SUCCESS if the deallocation operations executed successfully, and
+ /// return \c OFFLOAD_FAIL otherwise.
+ [[nodiscard]] int deallocTgtPtrAndEntry(HostDataToTargetTy *Entry,
+ int64_t Size);
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
int disassociatePtr(void *HstPtrBegin);
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -365,7 +365,8 @@
TargetPointerResultTy
DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
bool UpdateRefCount, bool UseHoldRefCount,
- bool &IsHostPtr, bool MustContain, bool ForceDelete) {
+ bool &IsHostPtr, bool MustContain, bool ForceDelete,
+ bool FromDataEnd) {
HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
void *TargetPointer = NULL;
@@ -386,15 +387,18 @@
"expected correct IsLast prediction for reset");
}
+ // Increment the number of threads that is using the entry on a
+ // targetDataEnd, tracking the number of possible "deleters". A thread may
+ // come to own the entry deletion even if it was not the last one querying
+ // for it. Thus, we must track every query on targetDataEnds to ensure only
+ // the last thread that holds a reference to an entry actually deletes it.
+ if (FromDataEnd)
+ HT.incDataEndThreadCount();
+
const char *RefCountAction;
if (!UpdateRefCount) {
RefCountAction = " (update suppressed)";
} else if (IsLast) {
- // Mark the entry as to be deleted by this thread. Another thread might
- // reuse the entry and take "ownership" for the deletion while this thread
- // is waiting for data transfers. That is fine and the current thread will
- // simply skip the deletion step then.
- HT.setDeleteThreadId();
HT.decRefCount(UseHoldRefCount);
assert(HT.getTotalRefCount() == 0 &&
"Expected zero reference count when deletion is scheduled");
@@ -450,41 +454,42 @@
return NULL;
}
-int DeviceTy::deallocTgtPtr(HDTTMapAccessorTy &HDTTMap, LookupResult LR,
- int64_t Size) {
- // Check if the pointer is contained in any sub-nodes.
- if (!(LR.Flags.IsContained || LR.Flags.ExtendsBefore ||
- LR.Flags.ExtendsAfter)) {
- REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the"
- " allocated memory\n",
- DPxPTR(LR.Entry->HstPtrBegin));
- return OFFLOAD_FAIL;
- }
-
- auto &HT = *LR.Entry;
- // Verify this thread is still in charge of deleting the entry.
- assert(HT.getTotalRefCount() == 0 &&
- HT.getDeleteThreadId() == std::this_thread::get_id() &&
+int DeviceTy::eraseMapEntry(HDTTMapAccessorTy &HDTTMap,
+ HostDataToTargetTy *Entry, int64_t Size) {
+ assert(Entry && "Trying to delete a null entry from the HDTT map.");
+ assert(Entry->getTotalRefCount() == 0 && Entry->getDataEndThreadCount() == 0 &&
"Trying to delete entry that is in use or owned by another thread.");
- DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
- DPxPTR(HT.TgtPtrBegin), Size);
- deleteData((void *)HT.TgtPtrBegin);
INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
"Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
", Size=%" PRId64 ", Name=%s\n",
- DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size,
- (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown");
- void *Event = LR.Entry->getEvent();
- HDTTMap->erase(LR.Entry);
- delete LR.Entry;
+ DPxPTR(Entry->HstPtrBegin), DPxPTR(Entry->TgtPtrBegin), Size,
+ (Entry->HstPtrName) ? getNameFromMapping(Entry->HstPtrName).c_str()
+ : "unknown");
+
+ if (HDTTMap->erase(Entry) == 0) {
+ REPORT("Trying to remove a non-existent map entry\n");
+ return OFFLOAD_FAIL;
+ }
- int Ret = OFFLOAD_SUCCESS;
+ return OFFLOAD_SUCCESS;
+}
+
+int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) {
+ assert(Entry && "Trying to deallocate a null entry.");
+
+ DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
+ DPxPTR(Entry->TgtPtrBegin), Size);
+
+ void *Event = Entry->getEvent();
if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) {
REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event));
- Ret = OFFLOAD_FAIL;
+ return OFFLOAD_FAIL;
}
+ int Ret = deleteData((void *)Entry->TgtPtrBegin);
+ delete Entry;
+
return Ret;
}
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -677,17 +677,16 @@
/// The mapping type (bitfield).
int64_t ArgType;
+ /// Index of the argument in the data mapping scheme.
+ int32_t ArgIndex;
+
/// The target pointer information.
TargetPointerResultTy TPR;
- /// Are we expecting to delete this entry or not. Even if set, we might not
- /// delete the entry if another thread reused the entry in the meantime.
- bool DelEntry;
-
- PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType, bool DelEntry,
- TargetPointerResultTy TPR)
- : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), TPR(TPR),
- DelEntry(DelEntry) {}
+ PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType,
+ int32_t ArgIndex, TargetPointerResultTy TPR)
+ : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType),
+ ArgIndex(ArgIndex), TPR(TPR) {}
};
/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to
@@ -737,42 +736,55 @@
/// data end. This includes the update of pointers at the host and removal of
/// device buffer when needed. It returns OFFLOAD_FAIL or OFFLOAD_SUCCESS
/// according to the successfulness of the operations.
-static int
+[[nodiscard]] static int
postProcessingTargetDataEnd(DeviceTy *Device,
SmallVector EntriesInfo,
- void *FromMapperBase) {
+ bool FromMapper) {
int Ret = OFFLOAD_SUCCESS;
+ void *FromMapperBase = nullptr;
+
+ for (auto &[HstPtrBegin, DataSize, ArgType, ArgIndex, TPR] : EntriesInfo) {
+ bool DelEntry = !TPR.isHostPointer();
+
+ // If the last element from the mapper (for end transfer args comes in
+ // reverse order), do not remove the partial entry, the parent struct still
+ // exists.
+ if ((ArgType & OMP_TGT_MAPTYPE_MEMBER_OF) &&
+ !(ArgType & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
+ DelEntry = false; // protect parent struct from being deallocated
+ }
+
+ if (DelEntry && FromMapper && ArgIndex == 0) {
+ DelEntry = false;
+ FromMapperBase = HstPtrBegin;
+ }
- for (PostProcessingInfo &Info : EntriesInfo) {
// If we marked the entry to be deleted we need to verify no other
// thread reused it by now. If deletion is still supposed to happen by
// this thread LR will be set and exclusive access to the HDTT map
// will avoid another thread reusing the entry now. Note that we do
- // not request (exclusive) access to the HDTT map if Info.DelEntry is
+ // not request (exclusive) access to the HDTT map if DelEntry is
// not set.
- LookupResult LR;
DeviceTy::HDTTMapAccessorTy HDTTMap =
- Device->HostDataToTargetMap.getExclusiveAccessor(!Info.DelEntry);
-
- if (Info.DelEntry) {
- LR = Device->lookupMapping(HDTTMap, Info.HstPtrBegin, Info.DataSize);
- if (LR.Entry->getTotalRefCount() != 0 ||
- LR.Entry->getDeleteThreadId() != std::this_thread::get_id()) {
- // The thread is not in charge of deletion anymore. Give up access
- // to the HDTT map and unset the deletion flag.
- HDTTMap.destroy();
- Info.DelEntry = false;
- }
+ Device->HostDataToTargetMap.getExclusiveAccessor(!DelEntry);
+
+ const bool IsNotLastUser = TPR.Entry->decDataEndThreadCount() != 0;
+ if (DelEntry && (TPR.Entry->getTotalRefCount() != 0 || IsNotLastUser)) {
+ // The thread is not in charge of deletion anymore. Give up access
+ // to the HDTT map and unset the deletion flag.
+ HDTTMap.destroy();
+ DelEntry = false;
}
// If we copied back to the host a struct/array containing pointers,
// we need to restore the original host pointer values from their
// shadow copies. If the struct is going to be deallocated, remove any
// remaining shadow pointer entries for this struct.
+ const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
auto CB = [&](ShadowPtrListTy::iterator &Itr) {
// If we copied the struct to the host, we need to restore the
// pointer.
- if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) {
+ if (HasFrom) {
void **ShadowHstPtrAddr = (void **)Itr->first;
*ShadowHstPtrAddr = Itr->second.HstPtrVal;
DP("Restoring original host pointer value " DPxMOD " for host "
@@ -780,7 +792,7 @@
DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
}
// If the struct is to be deallocated, remove the shadow entry.
- if (Info.DelEntry) {
+ if (DelEntry) {
DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first));
auto OldItr = Itr;
Itr++;
@@ -790,19 +802,20 @@
}
return OFFLOAD_SUCCESS;
};
- applyToShadowMapEntries(*Device, CB, Info.HstPtrBegin, Info.DataSize,
- Info.TPR);
+ applyToShadowMapEntries(*Device, CB, HstPtrBegin, DataSize, TPR);
+
+ if (!DelEntry || (FromMapperBase && FromMapperBase == HstPtrBegin))
+ continue;
// If we are deleting the entry the DataMapMtx is locked and we own
// the entry.
- if (Info.DelEntry) {
- if (!FromMapperBase || FromMapperBase != Info.HstPtrBegin)
- Ret = Device->deallocTgtPtr(HDTTMap, LR, Info.DataSize);
-
- if (Ret != OFFLOAD_SUCCESS) {
- REPORT("Deallocating data from device failed.\n");
- break;
- }
+ Ret = Device->eraseMapEntry(HDTTMap, TPR.Entry, DataSize);
+ // Entry is already remove from the map, we can unlock it now.
+ HDTTMap.destroy();
+ Ret |= Device->deallocTgtPtrAndEntry(TPR.Entry, DataSize);
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT("Deallocating data from device failed.\n");
+ break;
}
}
@@ -872,11 +885,12 @@
bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;
bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
+ constexpr bool FromDataEnd = true;
// If PTR_AND_OBJ, HstPtrBegin is address of pointee
TargetPointerResultTy TPR = Device.getTgtPtrBegin(
HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr,
- !IsImplicit, ForceDelete);
+ !IsImplicit, ForceDelete, FromDataEnd);
void *TgtPtrBegin = TPR.TargetPointer;
if (!TPR.isPresent() && !TPR.isHostPointer() &&
(DataSize || HasPresentModifier)) {
@@ -917,61 +931,42 @@
if (!TPR.isPresent())
continue;
- bool DelEntry = IsLast;
-
- // If the last element from the mapper (for end transfer args comes in
- // reverse order), do not remove the partial entry, the parent struct still
- // exists.
- if ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
- !(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
- DelEntry = false; // protect parent struct from being deallocated
- }
-
- if ((ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
- // Move data back to the host
- if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {
- bool Always = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
- if ((Always || IsLast) && !IsHostPtr) {
- DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
- DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
-
- std::lock_guard LG(*TPR.Entry);
- // Wait for any previous transfer if an event is present.
- if (void *Event = TPR.Entry->getEvent()) {
- if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
- REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event));
- return OFFLOAD_FAIL;
- }
- }
-
- Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize,
- AsyncInfo);
- if (Ret != OFFLOAD_SUCCESS) {
- REPORT("Copying data from device failed.\n");
- return OFFLOAD_FAIL;
- }
-
- // As we are expecting to delete the entry the d2h copy might race
- // with another one that also tries to delete the entry. This happens
- // as the entry can be reused and the reuse might happen after the
- // copy-back was issued but before it completed. Since the reuse might
- // also copy-back a value we would race.
- if (IsLast) {
- if (TPR.Entry->addEventIfNecessary(Device, AsyncInfo) !=
- OFFLOAD_SUCCESS)
- return OFFLOAD_FAIL;
- }
+ // Move data back to the host
+ const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
+ const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM;
+ if (HasFrom && (HasAlways || IsLast) && !IsHostPtr) {
+ DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
+ DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
+
+ std::lock_guard LG(*TPR.Entry);
+ // Wait for any previous transfer if an event is present.
+ if (void *Event = TPR.Entry->getEvent()) {
+ if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
+ REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event));
+ return OFFLOAD_FAIL;
}
}
- if (DelEntry && FromMapper && I == 0) {
- DelEntry = false;
- FromMapperBase = HstPtrBegin;
+
+ Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo);
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT("Copying data from device failed.\n");
+ return OFFLOAD_FAIL;
}
- // Add pointer to the buffer for post-synchronize processing.
- PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I],
- DelEntry && !IsHostPtr, TPR);
+ // As we are expecting to delete the entry the d2h copy might race
+ // with another one that also tries to delete the entry. This happens
+ // as the entry can be reused and the reuse might happen after the
+ // copy-back was issued but before it completed. Since the reuse might
+ // also copy-back a value we would race.
+ if (IsLast) {
+ if (TPR.Entry->addEventIfNecessary(Device, AsyncInfo) !=
+ OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+ }
}
+
+ // Add pointer to the buffer for post-synchronize processing.
+ PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I], I, TPR);
}
// Add post-processing functions
diff --git a/openmp/libomptarget/test/mapping/map_back_race.cpp b/openmp/libomptarget/test/mapping/map_back_race.cpp
--- a/openmp/libomptarget/test/mapping/map_back_race.cpp
+++ b/openmp/libomptarget/test/mapping/map_back_race.cpp
@@ -2,19 +2,6 @@
// Taken from https://github.com/llvm/llvm-project/issues/54216
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: amdgcn-amd-amdhsa
-// UNSUPPORTED: amdgcn-amd-amdhsa-LTO
-// UNSUPPORTED: powerpc64le-ibm-linux-gnu
-// UNSUPPORTED: powerpc64le-ibm-linux-gnu-LTO
-// UNSUPPORTED: powerpc64-ibm-linux-gnu
-// UNSUPPORTED: powerpc64-ibm-linux-gnu-LTO
-// UNSUPPORTED: x86_64-pc-linux-gnu
-// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
-// UNSUPPORTED: nvptx64-nvidia-cuda
-// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
-
#include
#include
#include