diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -53,12 +53,20 @@ /// use mutable to allow modification via std::set iterator which is const. mutable uint64_t RefCount; static const uint64_t INFRefCount = ~(uint64_t)0; + /// This mutex will be locked when data movement is issued. For targets that + /// doesn't support async data movement, this mutex can guarantee that after + /// it is released, memory region on the target is update to date. For targets + /// that support async data movement, this can guarantee that data movement + /// has been issued. This mutex *must* be locked right before releasing the + /// mapping table lock. + std::shared_ptr UpdateMtx; public: HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB, map_var_info_t Name = nullptr, bool IsINF = false) : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name), - TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1) {} + TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1), + UpdateMtx(std::make_shared()) {} uint64_t getRefCount() const { return RefCount; } @@ -100,6 +108,10 @@ return !isRefCountInf(); return getRefCount() == 1; } + + void lock() const { UpdateMtx->lock(); } + + void unlock() const { UpdateMtx->unlock(); } }; typedef uintptr_t HstPtrBeginTy; @@ -195,12 +207,21 @@ bool isDataExchangable(const DeviceTy &DstDevice); LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); - TargetPointerResultTy getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, - int64_t Size, - map_var_info_t HstPtrName, - bool IsImplicit, bool UpdateRefCount, - bool HasCloseModifier, - bool HasPresentModifier); + /// Get the target pointer based on host pointer begin and base. If the + /// mapping already exists, the target pointer will be returned directly. In + /// addition, if \p MoveData is true, the memory region pointed by \p + /// HstPtrBegin of size \p Size will also be transferred to the device. If the + /// mapping doesn't exist, and if unified memory is not enabled, a new mapping + /// will be created and the data will also be transferred accordingly. nullptr + /// will be returned because of any of following reasons: + /// - Data allocation failed; + /// - The user tried to do an illegal mapping; + /// - Data transfer issue fails. + TargetPointerResultTy + getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, + map_var_info_t HstPtrName, bool MoveData, bool IsImplicit, + bool UpdateRefCount, bool HasCloseModifier, + bool HasPresentModifier, AsyncInfoTy &AsyncInfo); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool &IsHostPtr, diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -165,26 +165,18 @@ return lr; } -// Used by targetDataBegin -// Return a struct containing target pointer begin (where the data will be -// moved). -// Allocate memory if this is the first occurrence of this mapping. -// Increment the reference counter. -// If the target pointer is NULL, then either data allocation failed or the user -// tried to do an illegal mapping. -// The returned struct also returns an iterator to the map table entry -// corresponding to the host pointer (if exists), and two flags indicating -// whether the entry is just created, and if the target pointer included is -// actually a host pointer (when unified memory enabled). TargetPointerResultTy -DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - map_var_info_t HstPtrName, bool IsImplicit, - bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier) { - void *TargetPointer = NULL; - bool IsNew = false; +DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, + map_var_info_t HstPtrName, bool MoveData, + bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, + AsyncInfoTy &AsyncInfo) { + void *TargetPointer = nullptr; bool IsHostPtr = false; + bool IsNew = false; + DataMapMtx.lock(); + LookupResult LR = lookupMapping(HstPtrBegin, Size); auto Entry = LR.Entry; @@ -249,6 +241,7 @@ } else if (Size) { // If it is not contained and Size > 0, we should create a new entry for it. IsNew = true; + MoveData = true; uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin); Entry = HostDataToTargetMap .emplace((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, @@ -263,7 +256,35 @@ TargetPointer = (void *)Ptr; } - DataMapMtx.unlock(); + // If the target pointer is valid, and we need to transfer data, issue the + // data transfer. + if (TargetPointer && MoveData) { + // Lock the entry before releasing the mapping table lock such that another + // thread that could issue data movement will get the right result. + Entry->lock(); + // Release the mapping table lock right after the entry is locked. + DataMapMtx.unlock(); + + DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size, + DPxPTR(HstPtrBegin), DPxPTR(TargetPointer)); + + int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo); + + // Unlock the entry immediately after the data movement is issued. + Entry->unlock(); + + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Copying data to device failed.\n"); + // We will also return nullptr if the data movement fails because that + // pointer points to a corrupted memory region so it doesn't make any + // sense to continue to use it. + TargetPointer = nullptr; + } + } else { + // Release the mapping table lock directly. + DataMapMtx.unlock(); + } + return {{IsNew, IsHostPtr}, Entry, TargetPointer}; } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -487,9 +487,9 @@ // entry for a global that might not already be allocated by the time the // PTR_AND_OBJ entry is handled below, and so the allocation might fail // when HasPresentModifier. - Pointer_TPR = Device.getOrAllocTgtPtr( - HstPtrBase, HstPtrBase, sizeof(void *), nullptr, IsImplicit, - UpdateRef, HasCloseModifier, HasPresentModifier); + Pointer_TPR = Device.getTargetPointer( + HstPtrBase, HstPtrBase, sizeof(void *), nullptr, false, IsImplicit, + UpdateRef, HasCloseModifier, HasPresentModifier, AsyncInfo); PointerTgtPtrBegin = Pointer_TPR.TargetPointer; IsHostPtr = Pointer_TPR.Flags.IsHostPointer; if (!PointerTgtPtrBegin) { @@ -511,9 +511,16 @@ (!FromMapper || i != 0); // subsequently update ref count of pointee } - auto TPR = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, data_size, - HstPtrName, IsImplicit, UpdateRef, - HasCloseModifier, HasPresentModifier); + bool MoveData = false; + if (arg_types[i] & OMP_TGT_MAPTYPE_TO) + if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + HasCloseModifier) + if (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS) + MoveData = true; + + auto TPR = Device.getTargetPointer( + HstPtrBegin, HstPtrBase, data_size, HstPtrName, MoveData, IsImplicit, + UpdateRef, HasCloseModifier, HasPresentModifier, AsyncInfo); void *TgtPtrBegin = TPR.TargetPointer; IsHostPtr = TPR.Flags.IsHostPointer; // If data_size==0, then the argument could be a zero-length pointer to @@ -535,26 +542,6 @@ args_base[i] = TgtPtrBase; } - if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { - bool copy = false; - if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || - HasCloseModifier) { - if (TPR.Flags.IsNewEntry || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) - copy = true; - } - - if (copy && !IsHostPtr) { - DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", - data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - int rt = - Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo); - if (rt != OFFLOAD_SUCCESS) { - REPORT("Copying data to device failed.\n"); - return OFFLOAD_FAIL; - } - } - } - if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { // Check whether we need to update the pointer on the device bool UpdateDevPtr = false;