diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -57,7 +57,8 @@ struct StatesTy { StatesTy(uint64_t DRC, uint64_t HRC) - : DynRefCount(DRC), HoldRefCount(HRC) {} + : DynRefCount(DRC), HoldRefCount(HRC), + MayContainAttachedPointers(false) {} /// The dynamic reference count is the standard reference count as of OpenMP /// 4.5. The hold reference count is an OpenMP extension for the sake of /// OpenACC support. @@ -75,6 +76,11 @@ /// uint64_t DynRefCount; uint64_t HoldRefCount; + + /// Boolean flag to remember if any subpart of the mapped region might be + /// an attached pointer. + bool MayContainAttachedPointers; + /// This mutex will be locked when data movement is issued. For targets that /// doesn't support async data movement, this mutex can guarantee that after /// it is released, memory region on the target is update to date. For @@ -181,6 +187,13 @@ void lock() const { States->UpdateMtx.lock(); } void unlock() const { States->UpdateMtx.unlock(); } + + void setMayContainAttachedPointers() const { + States->MayContainAttachedPointers = true; + } + bool getMayContainAttachedPointers() const { + return States->MayContainAttachedPointers; + } }; typedef uintptr_t HstPtrBeginTy; @@ -293,10 +306,11 @@ bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); - void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount, bool UseHoldRefCount, - bool &IsHostPtr, bool MustContain = false, - bool ForceDelete = false); + TargetPointerResultTy getTgtPtrBegin(void *HstPtrBegin, int64_t Size, + bool &IsLast, bool UpdateRefCount, + bool UseHoldRefCount, bool &IsHostPtr, + bool MustContain = false, + bool ForceDelete = false); /// For the map entry for \p HstPtrBegin, decrement the reference count /// specified by \p HasHoldModifier and, if the the total reference count is /// then zero, deallocate the corresponding device storage and remove the map diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -107,10 +107,11 @@ DeviceTy &Device = *PM->Devices[device_num]; bool IsLast; // not used bool IsHostPtr; - void *TgtPtr = Device.getTgtPtrBegin(const_cast(ptr), 0, IsLast, - /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr); - int rc = (TgtPtr != NULL); + TargetPointerResultTy TPR = + Device.getTgtPtrBegin(const_cast(ptr), 0, IsLast, + /*UpdateRefCount=*/false, + /*UseHoldRefCount=*/false, IsHostPtr); + int rc = (TPR.TargetPointer != NULL); // Under unified memory the host pointer can be returned by the // getTgtPtrBegin() function which means that there is no device // corresponding point for ptr. This function should return false diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -288,11 +288,12 @@ // Used by targetDataBegin, targetDataEnd, targetDataUpdate and target. // Return the target pointer begin (where the data will be moved). // Decrement the reference counter if called from targetDataEnd. -void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount, bool UseHoldRefCount, - bool &IsHostPtr, bool MustContain, - bool ForceDelete) { - void *rc = NULL; +TargetPointerResultTy +DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, + bool UpdateRefCount, bool UseHoldRefCount, + bool &IsHostPtr, bool MustContain, bool ForceDelete) { + void *TargetPointer = NULL; + bool IsNew = false; IsHostPtr = false; IsLast = false; DataMapMtx.lock(); @@ -334,7 +335,7 @@ "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(), DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction); - rc = (void *)tp; + TargetPointer = (void *)tp; } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { // If the value isn't found in the mapping and unified shared memory // is on then it means we have stumbled upon a value which we need to @@ -343,11 +344,11 @@ "memory\n", DPxPTR((uintptr_t)HstPtrBegin), Size); IsHostPtr = true; - rc = HstPtrBegin; + TargetPointer = HstPtrBegin; } DataMapMtx.unlock(); - return rc; + return {{IsNew, IsHostPtr}, lr.Entry, TargetPointer}; } // Return the target pointer begin (where the data will be moved). diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -17,6 +17,7 @@ #include "rtl.h" #include +#include #include int AsyncInfoTy::synchronize() { @@ -567,6 +568,7 @@ // create or update shadow pointers for this entry Device.ShadowPtrMap[Pointer_HstPtrBegin] = { HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase}; + TPR.MapTableEntry->setMayContainAttachedPointers(); UpdateDevPtr = true; } @@ -610,6 +612,48 @@ DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasHoldModifier) : HstPtrBegin(HstPtr), DataSize(Size), HasHoldModifier(HasHoldModifier) {} }; + +/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to +/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the +/// passed iterator an be updated. If the callback returns OFFLOAD_FAIL the rest +/// of the map is not checked anymore. +template +static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin, + uintptr_t Size, + const TargetPointerResultTy &TPR) { + // If we have an object that is too small to hold a pointer subobject, no need + // to do any checking. + if (Size < sizeof(void *)) + return; + + // If the map entry for the object was never marked as containing attached + // pointers, no need to do any checking. + if (!TPR.MapTableEntry->getMayContainAttachedPointers()) + return; + + uintptr_t LB = (uintptr_t)Begin; + uintptr_t UB = LB + Size; + // Now we are looking into the shadow map so we need to lock it. + Device.ShadowMtx.lock(); + for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin(); + Itr != Device.ShadowPtrMap.end();) { + uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first; + + // An STL map is sorted on its keys; use this property + // to quickly determine when to break out of the loop. + if (ShadowHstPtrAddr < LB) { + ++Itr; + continue; + } + if (ShadowHstPtrAddr >= UB) + break; + + if (CB(Itr) == OFFLOAD_FAIL) + break; + } + Device.ShadowMtx.unlock(); +} + } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -677,9 +721,10 @@ bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD; // If PTR_AND_OBJ, HstPtrBegin is address of pointee - void *TgtPtrBegin = Device.getTgtPtrBegin( + TargetPointerResultTy TPR = Device.getTgtPtrBegin( HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr, !IsImplicit, ForceDelete); + void *TgtPtrBegin = TPR.TargetPointer; if (!TgtPtrBegin && (DataSize || HasPresentModifier)) { DP("Mapping does not exist (%s)\n", (HasPresentModifier ? "'present' map type modifier" : "ignored")); @@ -752,27 +797,13 @@ // need to restore the original host pointer values from their shadow // copies. If the struct is going to be deallocated, remove any remaining // shadow pointer entries for this struct. - uintptr_t LB = (uintptr_t)HstPtrBegin; - uintptr_t UB = (uintptr_t)HstPtrBegin + DataSize; - Device.ShadowMtx.lock(); - for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin(); - Itr != Device.ShadowPtrMap.end();) { - void **ShadowHstPtrAddr = (void **)Itr->first; - - // An STL map is sorted on its keys; use this property - // to quickly determine when to break out of the loop. - if ((uintptr_t)ShadowHstPtrAddr < LB) { - ++Itr; - continue; - } - if ((uintptr_t)ShadowHstPtrAddr >= UB) - break; - + auto CB = [&](ShadowPtrListTy::iterator &Itr) { // If we copied the struct to the host, we need to restore the pointer. if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) { DP("Restoring original host pointer value " DPxMOD " for host " "pointer " DPxMOD "\n", DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); + void **ShadowHstPtrAddr = (void **)Itr->first; *ShadowHstPtrAddr = Itr->second.HstPtrVal; } // If the struct is to be deallocated, remove the shadow entry. @@ -782,8 +813,9 @@ } else { ++Itr; } - } - Device.ShadowMtx.unlock(); + return OFFLOAD_SUCCESS; + }; + applyToShadowMapEntries(Device, CB, HstPtrBegin, DataSize, TPR); // Add pointer to the buffer for later deallocation if (DelEntry && !IsHostPtr) @@ -819,9 +851,10 @@ int64_t ArgType, AsyncInfoTy &AsyncInfo) { TIMESCOPE_WITH_IDENT(loc); bool IsLast, IsHostPtr; - void *TgtPtrBegin = Device.getTgtPtrBegin( + TargetPointerResultTy TPR = Device.getTgtPtrBegin( HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, IsHostPtr, /*MustContain=*/true); + void *TgtPtrBegin = TPR.TargetPointer; if (!TgtPtrBegin) { DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); if (ArgType & OMP_TGT_MAPTYPE_PRESENT) { @@ -848,22 +881,15 @@ return OFFLOAD_FAIL; } - uintptr_t LB = (uintptr_t)HstPtrBegin; - uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize; - Device.ShadowMtx.lock(); - for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin(); - IT != Device.ShadowPtrMap.end(); ++IT) { - void **ShadowHstPtrAddr = (void **)IT->first; - if ((uintptr_t)ShadowHstPtrAddr < LB) - continue; - if ((uintptr_t)ShadowHstPtrAddr >= UB) - break; + auto CB = [&](ShadowPtrListTy::iterator &Itr) { + void **ShadowHstPtrAddr = (void **)Itr->first; DP("Restoring original host pointer value " DPxMOD " for host pointer " DPxMOD "\n", DPxPTR(IT->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - *ShadowHstPtrAddr = IT->second.HstPtrVal; - } - Device.ShadowMtx.unlock(); + *ShadowHstPtrAddr = Itr->second.HstPtrVal; + return OFFLOAD_SUCCESS; + }; + applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR); } if (ArgType & OMP_TGT_MAPTYPE_TO) { @@ -875,28 +901,17 @@ return OFFLOAD_FAIL; } - uintptr_t LB = (uintptr_t)HstPtrBegin; - uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize; - Device.ShadowMtx.lock(); - for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin(); - IT != Device.ShadowPtrMap.end(); ++IT) { - void **ShadowHstPtrAddr = (void **)IT->first; - if ((uintptr_t)ShadowHstPtrAddr < LB) - continue; - if ((uintptr_t)ShadowHstPtrAddr >= UB) - break; + auto CB = [&](ShadowPtrListTy::iterator &Itr) { DP("Restoring original target pointer value " DPxMOD " for target " "pointer " DPxMOD "\n", - DPxPTR(IT->second.TgtPtrVal), DPxPTR(IT->second.TgtPtrAddr)); - Ret = Device.submitData(IT->second.TgtPtrAddr, &IT->second.TgtPtrVal, + DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr)); + Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal, sizeof(void *), AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { + if (Ret != OFFLOAD_SUCCESS) REPORT("Copying data to device failed.\n"); - Device.ShadowMtx.unlock(); - return OFFLOAD_FAIL; - } - } - Device.ShadowMtx.unlock(); + return Ret; + }; + applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR); } return OFFLOAD_SUCCESS; } @@ -1281,9 +1296,10 @@ uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation(); - PointerTgtPtrBegin = Device.getTgtPtrBegin( + TargetPointerResultTy TPR = Device.getTgtPtrBegin( HstPtrVal, ArgSizes[I], IsLast, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, IsHostPtr); + PointerTgtPtrBegin = TPR.TargetPointer; if (!PointerTgtPtrBegin) { DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", DPxPTR(HstPtrVal)); @@ -1312,6 +1328,7 @@ map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I]; ptrdiff_t TgtBaseOffset; bool IsLast, IsHostPtr; // unused. + TargetPointerResultTy TPR; if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) { DP("Forwarding first-private value " DPxMOD " to the target construct\n", DPxPTR(HstPtrBase)); @@ -1337,9 +1354,10 @@ } else { if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) HstPtrBase = *reinterpret_cast(HstPtrBase); - TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast, - /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr); + TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast, + /*UpdateRefCount=*/false, + /*UseHoldRefCount=*/false, IsHostPtr); + TgtPtrBegin = TPR.TargetPointer; TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; #ifdef OMPTARGET_DEBUG void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);