diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -167,10 +167,21 @@ uint64_t getMapEntryRefCnt(void *HstPtrBegin); LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); - void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - map_var_info_t HstPtrName, bool &IsNew, - bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount, - bool HasCloseModifier, bool HasPresentModifier); + /// Get the target pointer based on host pointer begin and base. If the + /// mapping already exists, the target pointer will be returned directly. In + /// addition, if \p MoveData is true, the memory region pointed by \p + /// HstPtrBegin of size \p Size will also be transferred to the device. If the + /// mapping doesn't exist, and if unified memory is not enabled, a new mapping + /// will be created and the data will also be transferred accordingly. nullptr + /// will be returned because of any of following reasons: + /// - Data allocation failed; + /// - The user tried to do an illegal mapping; + /// - Data transfer issue fails. + void *getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, + map_var_info_t HstPtrName, bool &IsHostPtr, + bool MoveData, bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, + AsyncInfoTy &AsyncInfo); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool &IsHostPtr, diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include DeviceTy::DeviceTy(const DeviceTy &D) @@ -187,53 +188,49 @@ return lr; } -// Used by targetDataBegin -// Return the target pointer begin (where the data will be moved). -// Allocate memory if this is the first occurrence of this mapping. -// Increment the reference counter. -// If NULL is returned, then either data allocation failed or the user tried -// to do an illegal mapping. -void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, +void *DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, map_var_info_t HstPtrName, - bool &IsNew, bool &IsHostPtr, bool IsImplicit, - bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier) { - void *rc = NULL; + bool &IsHostPtr, bool MoveData, + bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, + AsyncInfoTy &AsyncInfo) { + void *TargetPointer = nullptr; + IsHostPtr = false; - IsNew = false; - DataMapMtx.lock(); - LookupResult lr = lookupMapping(HstPtrBegin, Size); + + std::lock_guard LG(DataMapMtx); + + LookupResult LR = lookupMapping(HstPtrBegin, Size); // Check if the pointer is contained. // If a variable is mapped to the device manually by the user - which would // lead to the IsContained flag to be true - then we must ensure that the // device address is returned even under unified memory conditions. - if (lr.Flags.IsContained || - ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) { - auto &HT = *lr.Entry; - IsNew = false; + if (LR.Flags.IsContained || + ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) { + auto &HT = *LR.Entry; if (UpdateRefCount) HT.incRefCount(); - uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + uintptr_t PV = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " "Size=%" PRId64 ",%s RefCount=%s, Name=%s\n", - (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(tp), + (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(PV), Size, (UpdateRefCount ? " updated" : ""), HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str(), (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - rc = (void *)tp; - } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) { + TargetPointer = (void *)PV; + } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) { // Explicit extension of mapped data - not allowed. MESSAGE("explicit extension not allowed: host address specified is " DPxMOD " (%" PRId64 " bytes), but device allocation maps to host at " DPxMOD " (%" PRId64 " bytes)", - DPxPTR(HstPtrBegin), Size, DPxPTR(lr.Entry->HstPtrBegin), - lr.Entry->HstPtrEnd - lr.Entry->HstPtrBegin); + DPxPTR(HstPtrBegin), Size, DPxPTR(LR.Entry->HstPtrBegin), + LR.Entry->HstPtrEnd - LR.Entry->HstPtrBegin); if (HasPresentModifier) MESSAGE("device mapping required by 'present' map type modifier does not " "exist for host address " DPxMOD " (%" PRId64 " bytes)", @@ -250,8 +247,10 @@ DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " RefCount=%s\n", DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : "")); + TargetPointer = HstPtrBegin; IsHostPtr = true; - rc = HstPtrBegin; + // We don't need to copy data in this case + MoveData = false; } } else if (HasPresentModifier) { DP("Mapping required by 'present' map type modifier does not exist for " @@ -262,21 +261,34 @@ DPxPTR(HstPtrBegin), Size); } else if (Size) { // If it is not contained and Size > 0, we should create a new entry for it. - IsNew = true; - uintptr_t tp = (uintptr_t)allocData(Size, HstPtrBegin); + MoveData = true; + uintptr_t PV = (uintptr_t)allocData(Size, HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Creating new map entry with " "HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, Name=%s\n", - DPxPTR(HstPtrBegin), DPxPTR(tp), Size, + DPxPTR(HstPtrBegin), DPxPTR(PV), Size, (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - HostDataToTargetMap.emplace( - HostDataToTargetTy((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, - (uintptr_t)HstPtrBegin + Size, tp, HstPtrName)); - rc = (void *)tp; + HostDataToTargetMap.emplace((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, + (uintptr_t)HstPtrBegin + Size, PV, HstPtrName); + TargetPointer = (void *)PV; } - DataMapMtx.unlock(); - return rc; + // If the target pointer is valid, and we need to transfer data, issue the + // data transfer. + if (TargetPointer && MoveData) { + DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size, + DPxPTR(HstPtrBegin), DPxPTR(TargetPointer)); + int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Copying data to device failed.\n"); + // We will also return nullptr if the data movement fails because that + // pointer points to a corrupted memory region so it doesn't make any + // sense to continue to use it. + return nullptr; + } + } + + return TargetPointer; } // Used by targetDataBegin, targetDataEnd, targetDataUpdate and target. diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -458,7 +458,6 @@ // Address of pointer on the host and device, respectively. void *Pointer_HstPtrBegin, *PointerTgtPtrBegin; - bool IsNew, Pointer_IsNew; bool IsHostPtr = false; bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; // Force the creation of a device side copy of the data when: @@ -487,20 +486,18 @@ // entry for a global that might not already be allocated by the time the // PTR_AND_OBJ entry is handled below, and so the allocation might fail // when HasPresentModifier. - PointerTgtPtrBegin = Device.getOrAllocTgtPtr( - HstPtrBase, HstPtrBase, sizeof(void *), nullptr, Pointer_IsNew, - IsHostPtr, IsImplicit, UpdateRef, HasCloseModifier, - HasPresentModifier); + PointerTgtPtrBegin = Device.getTargetPointer( + HstPtrBase, HstPtrBase, sizeof(void *), nullptr, IsHostPtr, + false /* MoveData */, IsImplicit, UpdateRef, HasCloseModifier, + HasPresentModifier, AsyncInfo); if (!PointerTgtPtrBegin) { REPORT("Call to getOrAllocTgtPtr returned null pointer (%s).\n", HasPresentModifier ? "'present' map type modifier" : "device failure or illegal mapping"); return OFFLOAD_FAIL; } - DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" - "\n", - sizeof(void *), DPxPTR(PointerTgtPtrBegin), - (Pointer_IsNew ? "" : " not")); + DP("There are %zu bytes allocated at target address " DPxMOD "\n", + sizeof(void *), DPxPTR(PointerTgtPtrBegin)); Pointer_HstPtrBegin = HstPtrBase; // modify current entry. HstPtrBase = *(void **)HstPtrBase; @@ -510,9 +507,29 @@ (!FromMapper || i != 0); // subsequently update ref count of pointee } - void *TgtPtrBegin = Device.getOrAllocTgtPtr( - HstPtrBegin, HstPtrBase, data_size, HstPtrName, IsNew, IsHostPtr, - IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier); + bool MoveData = false; + if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { + if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + HasCloseModifier) { + if (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS) + MoveData = true; + else if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && + !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { + // Copy data only if the "parent" struct has RefCount==1. + // If this is a PTR_AND_OBJ entry, the OBJ is not part of the struct, + // so exclude it from this check. + auto ParentIdx = getParentIndex(arg_types[i]); + auto ParentRC = Device.getMapEntryRefCnt(args[ParentIdx]); + assert(ParentRC > 0 && "parent struct not found"); + if (ParentRC == 1) + MoveData = true; + } + } + } + + void *TgtPtrBegin = Device.getTargetPointer( + HstPtrBegin, HstPtrBase, data_size, HstPtrName, IsHostPtr, MoveData, IsImplicit, + UpdateRef, HasCloseModifier, HasPresentModifier, AsyncInfo); // If data_size==0, then the argument could be a zero-length pointer to // NULL, so getOrAlloc() returning NULL is not an error. if (!TgtPtrBegin && (data_size || HasPresentModifier)) { @@ -521,9 +538,8 @@ : "device failure or illegal mapping"); return OFFLOAD_FAIL; } - DP("There are %" PRId64 " bytes allocated at target address " DPxMOD - " - is%s new\n", - data_size, DPxPTR(TgtPtrBegin), (IsNew ? "" : " not")); + DP("There are %" PRId64 " bytes allocated at target address " DPxMOD "\n", + data_size, DPxPTR(TgtPtrBegin)); if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) { uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase; @@ -532,38 +548,6 @@ args_base[i] = TgtPtrBase; } - if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { - bool copy = false; - if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || - HasCloseModifier) { - if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { - copy = true; - } else if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && - !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { - // Copy data only if the "parent" struct has RefCount==1. - // If this is a PTR_AND_OBJ entry, the OBJ is not part of the struct, - // so exclude it from this check. - int32_t parent_idx = getParentIndex(arg_types[i]); - uint64_t parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); - assert(parent_rc > 0 && "parent struct not found"); - if (parent_rc == 1) { - copy = true; - } - } - } - - if (copy && !IsHostPtr) { - DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", - data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - int rt = - Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo); - if (rt != OFFLOAD_SUCCESS) { - REPORT("Copying data to device failed.\n"); - return OFFLOAD_FAIL; - } - } - } - if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));