Diff 401489

openmp/libomptarget/include/device.h

Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
private:		private:
static const uint64_t INFRefCount = ~(uint64_t)0;		static const uint64_t INFRefCount = ~(uint64_t)0;
static std::string refCountToStr(uint64_t RefCount) {		static std::string refCountToStr(uint64_t RefCount) {
return RefCount == INFRefCount ? "INF" : std::to_string(RefCount);		return RefCount == INFRefCount ? "INF" : std::to_string(RefCount);
}		}

struct StatesTy {		struct StatesTy {
StatesTy(uint64_t DRC, uint64_t HRC)		StatesTy(uint64_t DRC, uint64_t HRC)
: DynRefCount(DRC), HoldRefCount(HRC) {}		: DynRefCount(DRC), HoldRefCount(HRC),
		MayContainAttachedPointers(false) {}
/// The dynamic reference count is the standard reference count as of OpenMP		/// The dynamic reference count is the standard reference count as of OpenMP
/// 4.5. The hold reference count is an OpenMP extension for the sake of		/// 4.5. The hold reference count is an OpenMP extension for the sake of
/// OpenACC support.		/// OpenACC support.
///		///
/// The 'ompx_hold' map type modifier is permitted only on "omp target" and		/// The 'ompx_hold' map type modifier is permitted only on "omp target" and
/// "omp target data", and "delete" is permitted only on "omp target exit		/// "omp target data", and "delete" is permitted only on "omp target exit
/// data" and associated runtime library routines. As a result, we really		/// data" and associated runtime library routines. As a result, we really
/// need to implement "reset" functionality only for the dynamic reference		/// need to implement "reset" functionality only for the dynamic reference
/// counter. Likewise, only the dynamic reference count can be infinite		/// counter. Likewise, only the dynamic reference count can be infinite
/// because, for example, omp_target_associate_ptr and "omp declare target		/// because, for example, omp_target_associate_ptr and "omp declare target
/// link" operate only on it. Nevertheless, it's actually easier to follow		/// link" operate only on it. Nevertheless, it's actually easier to follow
/// the code (and requires less assertions for special cases) when we just		/// the code (and requires less assertions for special cases) when we just
/// implement these features generally across both reference counters here.		/// implement these features generally across both reference counters here.
/// Thus, it's the users of this class that impose those restrictions.		/// Thus, it's the users of this class that impose those restrictions.
///		///
uint64_t DynRefCount;		uint64_t DynRefCount;
uint64_t HoldRefCount;		uint64_t HoldRefCount;

		/// Boolean flag to remember if any subpart of the mapped region might be
		/// an attached pointer.
		bool MayContainAttachedPointers;

/// This mutex will be locked when data movement is issued. For targets that		/// This mutex will be locked when data movement is issued. For targets that
/// doesn't support async data movement, this mutex can guarantee that after		/// doesn't support async data movement, this mutex can guarantee that after
/// it is released, memory region on the target is update to date. For		/// it is released, memory region on the target is update to date. For
/// targets that support async data movement, this can guarantee that data		/// targets that support async data movement, this can guarantee that data
/// movement has been issued. This mutex must be locked right before		/// movement has been issued. This mutex must be locked right before
/// releasing the mapping table lock.		/// releasing the mapping table lock.
std::mutex UpdateMtx;		std::mutex UpdateMtx;
/// Pointer to the event corresponding to the data update of this map.		/// Pointer to the event corresponding to the data update of this map.
▲ Show 20 Lines • Show All 105 Lines • ▼ Show 20 Lines	uint64_t OtherRefCount =
UseHoldRefCount ? States->DynRefCount : States->HoldRefCount;		UseHoldRefCount ? States->DynRefCount : States->HoldRefCount;
if (OtherRefCount > 0)		if (OtherRefCount > 0)
return false;		return false;
if (AfterReset)		if (AfterReset)
return ThisRefCount != INFRefCount;		return ThisRefCount != INFRefCount;
return ThisRefCount == 1;		return ThisRefCount == 1;
}		}

		void setMayContainAttachedPointers() const {
		States->MayContainAttachedPointers = true;
		}
		bool getMayContainAttachedPointers() const {
		return States->MayContainAttachedPointers;
		}

/// Helper to make sure the entry is locked in a scope.		/// Helper to make sure the entry is locked in a scope.
/// TODO: We should generalize this and use it for all our objects that use		/// TODO: We should generalize this and use it for all our objects that use
/// lock/unlock methods.		/// lock/unlock methods.
struct LockGuard {		struct LockGuard {
const HostDataToTargetTy &Entry;		const HostDataToTargetTy &Entry;

public:		public:
LockGuard(const HostDataToTargetTy &Entry) : Entry(Entry) { Entry.lock(); }		LockGuard(const HostDataToTargetTy &Entry) : Entry(Entry) { Entry.lock(); }
▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines	struct DeviceTy {
/// - Data transfer issue fails.		/// - Data transfer issue fails.
TargetPointerResultTy		TargetPointerResultTy
getTargetPointer(void HstPtrBegin, void HstPtrBase, int64_t Size,		getTargetPointer(void HstPtrBegin, void HstPtrBase, int64_t Size,
map_var_info_t HstPtrName, bool HasFlagTo,		map_var_info_t HstPtrName, bool HasFlagTo,
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,		bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier,		bool HasCloseModifier, bool HasPresentModifier,
bool HasHoldModifier, AsyncInfoTy &AsyncInfo);		bool HasHoldModifier, AsyncInfoTy &AsyncInfo);
void getTgtPtrBegin(void HstPtrBegin, int64_t Size);		void getTgtPtrBegin(void HstPtrBegin, int64_t Size);
void getTgtPtrBegin(void HstPtrBegin, int64_t Size, bool &IsLast,		TargetPointerResultTy getTgtPtrBegin(void *HstPtrBegin, int64_t Size,
bool UpdateRefCount, bool UseHoldRefCount,		bool &IsLast, bool UpdateRefCount,
bool &IsHostPtr, bool MustContain = false,		bool UseHoldRefCount, bool &IsHostPtr,
		bool MustContain = false,
bool ForceDelete = false);		bool ForceDelete = false);
/// For the map entry for \p HstPtrBegin, decrement the reference count		/// For the map entry for \p HstPtrBegin, decrement the reference count
/// specified by \p HasHoldModifier and, if the the total reference count is		/// specified by \p HasHoldModifier and, if the the total reference count is
/// then zero, deallocate the corresponding device storage and remove the map		/// then zero, deallocate the corresponding device storage and remove the map
/// entry. Return \c OFFLOAD_SUCCESS if the map entry existed, and return		/// entry. Return \c OFFLOAD_SUCCESS if the map entry existed, and return
/// \c OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling		/// \c OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling
/// this function if the map entry is not expected to exist because		/// this function if the map entry is not expected to exist because
/// \p HstPtrBegin uses shared memory.		/// \p HstPtrBegin uses shared memory.
int deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool HasHoldModifier);		int deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool HasHoldModifier);
▲ Show 20 Lines • Show All 112 Lines • Show Last 20 Lines

openmp/libomptarget/src/api.cpp

Show First 20 Lines • Show All 101 Lines • ▼ Show 20 Lines	if (DevicesSize <= (size_t)device_num) {
DP("Call to omp_target_is_present with invalid device ID, returning "		DP("Call to omp_target_is_present with invalid device ID, returning "
"false\n");		"false\n");
return false;		return false;
}		}

DeviceTy &Device = *PM->Devices[device_num];		DeviceTy &Device = *PM->Devices[device_num];
bool IsLast; // not used		bool IsLast; // not used
bool IsHostPtr;		bool IsHostPtr;
void TgtPtr = Device.getTgtPtrBegin(const_cast<void >(ptr), 0, IsLast,		TargetPointerResultTy TPR =
		Device.getTgtPtrBegin(const_cast<void *>(ptr), 0, IsLast,
/UpdateRefCount=/false,		/UpdateRefCount=/false,
/UseHoldRefCount=/false, IsHostPtr);		/UseHoldRefCount=/false, IsHostPtr);
int rc = (TgtPtr != NULL);		int rc = (TPR.TargetPointer != NULL);
// Under unified memory the host pointer can be returned by the		// Under unified memory the host pointer can be returned by the
// getTgtPtrBegin() function which means that there is no device		// getTgtPtrBegin() function which means that there is no device
// corresponding point for ptr. This function should return false		// corresponding point for ptr. This function should return false
// in that situation.		// in that situation.
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)		if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
rc = !IsHostPtr;		rc = !IsHostPtr;
DP("Call to omp_target_is_present returns %d\n", rc);		DP("Call to omp_target_is_present returns %d\n", rc);
return rc;		return rc;
▲ Show 20 Lines • Show All 199 Lines • Show Last 20 Lines

openmp/libomptarget/src/device.cpp

Show First 20 Lines • Show All 329 Lines • ▼ Show 20 Lines	DeviceTy::getTargetPointer(void HstPtrBegin, void HstPtrBase, int64_t Size,
}		}

return {{IsNew, IsHostPtr}, Entry, TargetPointer};		return {{IsNew, IsHostPtr}, Entry, TargetPointer};
}		}

// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.		// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
// Return the target pointer begin (where the data will be moved).		// Return the target pointer begin (where the data will be moved).
// Decrement the reference counter if called from targetDataEnd.		// Decrement the reference counter if called from targetDataEnd.
void DeviceTy::getTgtPtrBegin(void HstPtrBegin, int64_t Size, bool &IsLast,		TargetPointerResultTy
		DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
bool UpdateRefCount, bool UseHoldRefCount,		bool UpdateRefCount, bool UseHoldRefCount,
bool &IsHostPtr, bool MustContain,		bool &IsHostPtr, bool MustContain, bool ForceDelete) {
bool ForceDelete) {		void *TargetPointer = NULL;
void *rc = NULL;		bool IsNew = false;
		grokosUnsubmitted Not Done Reply Inline Actions Can you add a comment explaining that `IsNew` is always going to be `false` because `getTgtPtrBegin` is called from targetDataEnd, targetDataContiguous and processDataBefore, i.e. for data which is expected to be mapped already. And maybe it's more elegant to hardcode `false` in the return statement directly, thus avoiding defining a variable which is never used in the body of this function. grokos: Can you add a comment explaining that `IsNew` is always going to be `false` because…
IsHostPtr = false;		IsHostPtr = false;
IsLast = false;		IsLast = false;
DataMapMtx.lock();		DataMapMtx.lock();
LookupResult lr = lookupMapping(HstPtrBegin, Size);		LookupResult lr = lookupMapping(HstPtrBegin, Size);

if (lr.Flags.IsContained \|\|		if (lr.Flags.IsContained \|\|
(!MustContain && (lr.Flags.ExtendsBefore \|\| lr.Flags.ExtendsAfter))) {		(!MustContain && (lr.Flags.ExtendsBefore \|\| lr.Flags.ExtendsAfter))) {
auto &HT = *lr.Entry;		auto &HT = *lr.Entry;
Show All 25 Lines	if (lr.Flags.IsContained \|\|
const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction;		const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction;
const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : "";		const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : "";
uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);		uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,		INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
"Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "		"Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
"Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n",		"Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n",
DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(),		DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(),
DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction);		DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction);
rc = (void *)tp;		TargetPointer = (void *)tp;
} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {		} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
// If the value isn't found in the mapping and unified shared memory		// If the value isn't found in the mapping and unified shared memory
// is on then it means we have stumbled upon a value which we need to		// is on then it means we have stumbled upon a value which we need to
// use directly from the host.		// use directly from the host.
DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "		DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
"memory\n",		"memory\n",
DPxPTR((uintptr_t)HstPtrBegin), Size);		DPxPTR((uintptr_t)HstPtrBegin), Size);
IsHostPtr = true;		IsHostPtr = true;
rc = HstPtrBegin;		TargetPointer = HstPtrBegin;
}		}

DataMapMtx.unlock();		DataMapMtx.unlock();
return rc;		return {{IsNew, IsHostPtr}, lr.Entry, TargetPointer};
}		}

// Return the target pointer begin (where the data will be moved).		// Return the target pointer begin (where the data will be moved).
// Lock-free version called when loading global symbols from the fat binary.		// Lock-free version called when loading global symbols from the fat binary.
void DeviceTy::getTgtPtrBegin(void HstPtrBegin, int64_t Size) {		void DeviceTy::getTgtPtrBegin(void HstPtrBegin, int64_t Size) {
uintptr_t hp = (uintptr_t)HstPtrBegin;		uintptr_t hp = (uintptr_t)HstPtrBegin;
LookupResult lr = lookupMapping(HstPtrBegin, Size);		LookupResult lr = lookupMapping(HstPtrBegin, Size);
if (lr.Flags.IsContained \|\| lr.Flags.ExtendsBefore \|\| lr.Flags.ExtendsAfter) {		if (lr.Flags.IsContained \|\| lr.Flags.ExtendsBefore \|\| lr.Flags.ExtendsAfter) {
▲ Show 20 Lines • Show All 262 Lines • Show Last 20 Lines

openmp/libomptarget/src/omptarget.cpp

Show All 11 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "omptarget.h"		#include "omptarget.h"
#include "device.h"		#include "device.h"
#include "private.h"		#include "private.h"
#include "rtl.h"		#include "rtl.h"

#include <cassert>		#include <cassert>
		#include <cstdint>
#include <vector>		#include <vector>

int AsyncInfoTy::synchronize() {		int AsyncInfoTy::synchronize() {
int Result = OFFLOAD_SUCCESS;		int Result = OFFLOAD_SUCCESS;
if (AsyncInfo.Queue) {		if (AsyncInfo.Queue) {
// If we have a queue we need to synchronize it now.		// If we have a queue we need to synchronize it now.
Result = Device.synchronize(*this);		Result = Device.synchronize(*this);
assert(AsyncInfo.Queue == nullptr &&		assert(AsyncInfo.Queue == nullptr &&
▲ Show 20 Lines • Show All 484 Lines • ▼ Show 20 Lines	if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
// No need to update pointee ref count for the first element of the		// No need to update pointee ref count for the first element of the
// subelement that comes from mapper.		// subelement that comes from mapper.
UpdateRef =		UpdateRef =
(!FromMapper \|\| i != 0); // subsequently update ref count of pointee		(!FromMapper \|\| i != 0); // subsequently update ref count of pointee
}		}

const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO;		const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO;
const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;		const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
auto TPR = Device.getTargetPointer(HstPtrBegin, HstPtrBase, data_size,		auto TPR = Device.getTargetPointer(
HstPtrName, HasFlagTo, HasFlagAlways,		HstPtrBegin, HstPtrBase, data_size, HstPtrName, HasFlagTo,
IsImplicit, UpdateRef, HasCloseModifier,		HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
HasPresentModifier, HasHoldModifier,		HasPresentModifier, HasHoldModifier, AsyncInfo);
AsyncInfo);
void *TgtPtrBegin = TPR.TargetPointer;		void *TgtPtrBegin = TPR.TargetPointer;
IsHostPtr = TPR.Flags.IsHostPointer;		IsHostPtr = TPR.Flags.IsHostPointer;
// If data_size==0, then the argument could be a zero-length pointer to		// If data_size==0, then the argument could be a zero-length pointer to
// NULL, so getOrAlloc() returning NULL is not an error.		// NULL, so getOrAlloc() returning NULL is not an error.
if (!TgtPtrBegin && (data_size \|\| HasPresentModifier)) {		if (!TgtPtrBegin && (data_size \|\| HasPresentModifier)) {
REPORT("Call to getTargetPointer returned null pointer (%s).\n",		REPORT("Call to getTargetPointer returned null pointer (%s).\n",
HasPresentModifier ? "'present' map type modifier"		HasPresentModifier ? "'present' map type modifier"
: "device failure or illegal mapping");		: "device failure or illegal mapping");
Show All 30 Lines	if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
// OBJ is deallocated and later on allocated again (at a different device		// OBJ is deallocated and later on allocated again (at a different device
// address), ShadowPtrMap still contains an entry for Pointer_HstPtrBegin		// address), ShadowPtrMap still contains an entry for Pointer_HstPtrBegin
// which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ.		// which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ.
if (Entry == Device.ShadowPtrMap.end() \|\|		if (Entry == Device.ShadowPtrMap.end() \|\|
Entry->second.TgtPtrVal != ExpectedTgtPtrBase) {		Entry->second.TgtPtrVal != ExpectedTgtPtrBase) {
// create or update shadow pointers for this entry		// create or update shadow pointers for this entry
Device.ShadowPtrMap[Pointer_HstPtrBegin] = {		Device.ShadowPtrMap[Pointer_HstPtrBegin] = {
HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};		HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
		Pointer_TPR.MapTableEntry->setMayContainAttachedPointers();
UpdateDevPtr = true;		UpdateDevPtr = true;
}		}

if (UpdateDevPtr) {		if (UpdateDevPtr) {
HostDataToTargetTy::LockGuard LG(*Pointer_TPR.MapTableEntry);		HostDataToTargetTy::LockGuard LG(*Pointer_TPR.MapTableEntry);
Device.ShadowMtx.unlock();		Device.ShadowMtx.unlock();

DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",		DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
Show All 28 Lines	struct DeallocTgtPtrInfo {
/// Size of the data		/// Size of the data
int64_t DataSize;		int64_t DataSize;
/// Whether it has \p ompx_hold modifier		/// Whether it has \p ompx_hold modifier
bool HasHoldModifier;		bool HasHoldModifier;

DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasHoldModifier)		DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasHoldModifier)
: HstPtrBegin(HstPtr), DataSize(Size), HasHoldModifier(HasHoldModifier) {}		: HstPtrBegin(HstPtr), DataSize(Size), HasHoldModifier(HasHoldModifier) {}
};		};

		/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to
		/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the
		/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the
		grokosUnsubmitted Not Done Reply Inline Actions an be --> can be grokos: an be --> can be
		/// rest of the map is not checked anymore.
		template <typename CBTy>
		static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin,
		uintptr_t Size,
		const TargetPointerResultTy &TPR) {
		// If we have an object that is too small to hold a pointer subobject, no need
		// to do any checking.
		if (Size < sizeof(void *))
		return;

		// If the map entry for the object was never marked as containing attached
		// pointers, no need to do any checking.
		if (TPR.MapTableEntry == HostDataToTargetListTy::iterator{} \|\|
		!TPR.MapTableEntry->getMayContainAttachedPointers())
		return;

		uintptr_t LB = (uintptr_t)Begin;
		uintptr_t UB = LB + Size;
		// Now we are looking into the shadow map so we need to lock it.
		Device.ShadowMtx.lock();
		for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin();
		Itr != Device.ShadowPtrMap.end();) {
		uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first;

		// An STL map is sorted on its keys; use this property
		// to quickly determine when to break out of the loop.
		if (ShadowHstPtrAddr < LB) {
		++Itr;
		continue;
		}
		if (ShadowHstPtrAddr >= UB)
		break;

		if (CB(Itr) == OFFLOAD_FAIL)
		break;
		}
		Device.ShadowMtx.unlock();
		}

} // namespace		} // namespace

/// Internal function to undo the mapping and retrieve the data from the device.		/// Internal function to undo the mapping and retrieve the data from the device.
int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,		int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
void ArgBases, void Args, int64_t *ArgSizes,		void ArgBases, void Args, int64_t *ArgSizes,
int64_t ArgTypes, map_var_info_t ArgNames,		int64_t ArgTypes, map_var_info_t ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {		void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
int Ret;		int Ret;
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	for (int32_t I = ArgNum - 1; I >= 0; --I) {
bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) \|\|		bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) \|\|
(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&		(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
!(FromMapper && I == 0);		!(FromMapper && I == 0);
bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;		bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;
bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;		bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;		bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;

// If PTR_AND_OBJ, HstPtrBegin is address of pointee		// If PTR_AND_OBJ, HstPtrBegin is address of pointee
void *TgtPtrBegin = Device.getTgtPtrBegin(		TargetPointerResultTy TPR = Device.getTgtPtrBegin(
HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr,		HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr,
!IsImplicit, ForceDelete);		!IsImplicit, ForceDelete);
		void *TgtPtrBegin = TPR.TargetPointer;
if (!TgtPtrBegin && (DataSize \|\| HasPresentModifier)) {		if (!TgtPtrBegin && (DataSize \|\| HasPresentModifier)) {
DP("Mapping does not exist (%s)\n",		DP("Mapping does not exist (%s)\n",
(HasPresentModifier ? "'present' map type modifier" : "ignored"));		(HasPresentModifier ? "'present' map type modifier" : "ignored"));
if (HasPresentModifier) {		if (HasPresentModifier) {
// OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13:		// OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13:
// "If a map clause appears on a target, target data, target enter data		// "If a map clause appears on a target, target data, target enter data
// or target exit data construct with a present map-type-modifier then		// or target exit data construct with a present map-type-modifier then
// on entry to the region if the corresponding list item does not appear		// on entry to the region if the corresponding list item does not appear
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines	if ((ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) \|\| DelEntry) {
DelEntry = false;		DelEntry = false;
FromMapperBase = HstPtrBegin;		FromMapperBase = HstPtrBegin;
}		}

// If we copied back to the host a struct/array containing pointers, we		// If we copied back to the host a struct/array containing pointers, we
// need to restore the original host pointer values from their shadow		// need to restore the original host pointer values from their shadow
// copies. If the struct is going to be deallocated, remove any remaining		// copies. If the struct is going to be deallocated, remove any remaining
// shadow pointer entries for this struct.		// shadow pointer entries for this struct.
uintptr_t LB = (uintptr_t)HstPtrBegin;		auto CB = [&](ShadowPtrListTy::iterator &Itr) {
uintptr_t UB = (uintptr_t)HstPtrBegin + DataSize;
Device.ShadowMtx.lock();
for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin();
Itr != Device.ShadowPtrMap.end();) {
void ShadowHstPtrAddr = (void )Itr->first;

// An STL map is sorted on its keys; use this property
// to quickly determine when to break out of the loop.
if ((uintptr_t)ShadowHstPtrAddr < LB) {
++Itr;
continue;
}
if ((uintptr_t)ShadowHstPtrAddr >= UB)
break;

// If we copied the struct to the host, we need to restore the pointer.		// If we copied the struct to the host, we need to restore the pointer.
if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {		if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {
		void ShadowHstPtrAddr = (void )Itr->first;
		*ShadowHstPtrAddr = Itr->second.HstPtrVal;
DP("Restoring original host pointer value " DPxMOD " for host "		DP("Restoring original host pointer value " DPxMOD " for host "
"pointer " DPxMOD "\n",		"pointer " DPxMOD "\n",
DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));		DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
*ShadowHstPtrAddr = Itr->second.HstPtrVal;
}		}
// If the struct is to be deallocated, remove the shadow entry.		// If the struct is to be deallocated, remove the shadow entry.
if (DelEntry) {		if (DelEntry) {
DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));		DP("Removing shadow pointer " DPxMOD "\n",
		DPxPTR((void **)Itr->first));
Itr = Device.ShadowPtrMap.erase(Itr);		Itr = Device.ShadowPtrMap.erase(Itr);
} else {		} else {
++Itr;		++Itr;
}		}
}		return OFFLOAD_SUCCESS;
Device.ShadowMtx.unlock();		};
		applyToShadowMapEntries(Device, CB, HstPtrBegin, DataSize, TPR);

// Add pointer to the buffer for later deallocation		// Add pointer to the buffer for later deallocation
if (DelEntry && !IsHostPtr)		if (DelEntry && !IsHostPtr)
DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasHoldModifier);		DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasHoldModifier);
}		}
}		}

// TODO: We should not synchronize here but pass the AsyncInfo object to the		// TODO: We should not synchronize here but pass the AsyncInfo object to the
Show All 19 Lines	int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
return OFFLOAD_SUCCESS;		return OFFLOAD_SUCCESS;
}		}

static int targetDataContiguous(ident_t loc, DeviceTy &Device, void ArgsBase,		static int targetDataContiguous(ident_t loc, DeviceTy &Device, void ArgsBase,
void *HstPtrBegin, int64_t ArgSize,		void *HstPtrBegin, int64_t ArgSize,
int64_t ArgType, AsyncInfoTy &AsyncInfo) {		int64_t ArgType, AsyncInfoTy &AsyncInfo) {
TIMESCOPE_WITH_IDENT(loc);		TIMESCOPE_WITH_IDENT(loc);
bool IsLast, IsHostPtr;		bool IsLast, IsHostPtr;
void *TgtPtrBegin = Device.getTgtPtrBegin(		TargetPointerResultTy TPR = Device.getTgtPtrBegin(
HstPtrBegin, ArgSize, IsLast, /UpdateRefCount=/false,		HstPtrBegin, ArgSize, IsLast, /UpdateRefCount=/false,
/UseHoldRefCount=/false, IsHostPtr, /MustContain=/true);		/UseHoldRefCount=/false, IsHostPtr, /MustContain=/true);
		void *TgtPtrBegin = TPR.TargetPointer;
if (!TgtPtrBegin) {		if (!TgtPtrBegin) {
DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));		DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {		if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
MESSAGE("device mapping required by 'present' motion modifier does not "		MESSAGE("device mapping required by 'present' motion modifier does not "
"exist for host address " DPxMOD " (%" PRId64 " bytes)",		"exist for host address " DPxMOD " (%" PRId64 " bytes)",
DPxPTR(HstPtrBegin), ArgSize);		DPxPTR(HstPtrBegin), ArgSize);
return OFFLOAD_FAIL;		return OFFLOAD_FAIL;
}		}
Show All 10 Lines	if (ArgType & OMP_TGT_MAPTYPE_FROM) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",		DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));		ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo);		int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {		if (Ret != OFFLOAD_SUCCESS) {
REPORT("Copying data from device failed.\n");		REPORT("Copying data from device failed.\n");
return OFFLOAD_FAIL;		return OFFLOAD_FAIL;
}		}

uintptr_t LB = (uintptr_t)HstPtrBegin;		auto CB = [&](ShadowPtrListTy::iterator &Itr) {
uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;		void ShadowHstPtrAddr = (void )Itr->first;
Device.ShadowMtx.lock();		*ShadowHstPtrAddr = Itr->second.HstPtrVal;
for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
IT != Device.ShadowPtrMap.end(); ++IT) {
void ShadowHstPtrAddr = (void )IT->first;
if ((uintptr_t)ShadowHstPtrAddr < LB)
continue;
if ((uintptr_t)ShadowHstPtrAddr >= UB)
break;
DP("Restoring original host pointer value " DPxMOD		DP("Restoring original host pointer value " DPxMOD
" for host pointer " DPxMOD "\n",		" for host pointer " DPxMOD "\n",
DPxPTR(IT->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));		DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
*ShadowHstPtrAddr = IT->second.HstPtrVal;		return OFFLOAD_SUCCESS;
}		};
Device.ShadowMtx.unlock();		applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
}		}

if (ArgType & OMP_TGT_MAPTYPE_TO) {		if (ArgType & OMP_TGT_MAPTYPE_TO) {
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",		DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));		ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo);		int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {		if (Ret != OFFLOAD_SUCCESS) {
REPORT("Copying data to device failed.\n");		REPORT("Copying data to device failed.\n");
return OFFLOAD_FAIL;		return OFFLOAD_FAIL;
}		}

uintptr_t LB = (uintptr_t)HstPtrBegin;		auto CB = [&](ShadowPtrListTy::iterator &Itr) {
uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;
Device.ShadowMtx.lock();
for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
IT != Device.ShadowPtrMap.end(); ++IT) {
void ShadowHstPtrAddr = (void )IT->first;
if ((uintptr_t)ShadowHstPtrAddr < LB)
continue;
if ((uintptr_t)ShadowHstPtrAddr >= UB)
break;
DP("Restoring original target pointer value " DPxMOD " for target "		DP("Restoring original target pointer value " DPxMOD " for target "
"pointer " DPxMOD "\n",		"pointer " DPxMOD "\n",
DPxPTR(IT->second.TgtPtrVal), DPxPTR(IT->second.TgtPtrAddr));		DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr));
Ret = Device.submitData(IT->second.TgtPtrAddr, &IT->second.TgtPtrVal,		Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal,
sizeof(void *), AsyncInfo);		sizeof(void *), AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {		if (Ret != OFFLOAD_SUCCESS)
REPORT("Copying data to device failed.\n");		REPORT("Copying data to device failed.\n");
Device.ShadowMtx.unlock();		return Ret;
return OFFLOAD_FAIL;		};
}		applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
}
Device.ShadowMtx.unlock();
}		}
return OFFLOAD_SUCCESS;		return OFFLOAD_SUCCESS;
}		}

static int targetDataNonContiguous(ident_t *loc, DeviceTy &Device,		static int targetDataNonContiguous(ident_t *loc, DeviceTy &Device,
void *ArgsBase,		void *ArgsBase,
__tgt_target_non_contig *NonContig,		__tgt_target_non_contig *NonContig,
uint64_t Size, int64_t ArgType,		uint64_t Size, int64_t ArgType,
▲ Show 20 Lines • Show All 366 Lines • ▼ Show 20 Lines	if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
void *HstPtrBase = Args[Idx];		void *HstPtrBase = Args[Idx];
bool IsLast, IsHostPtr; // IsLast is unused.		bool IsLast, IsHostPtr; // IsLast is unused.
void *TgtPtrBase =		void *TgtPtrBase =
(void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]);		(void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]);
DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));		DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;		uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
void TgtPtrBegin = (void )((uintptr_t)TgtPtrBase + Delta);		void TgtPtrBegin = (void )((uintptr_t)TgtPtrBase + Delta);
void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();		void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();
PointerTgtPtrBegin = Device.getTgtPtrBegin(		TargetPointerResultTy TPR = Device.getTgtPtrBegin(
HstPtrVal, ArgSizes[I], IsLast, /UpdateRefCount=/false,		HstPtrVal, ArgSizes[I], IsLast, /UpdateRefCount=/false,
/UseHoldRefCount=/false, IsHostPtr);		/UseHoldRefCount=/false, IsHostPtr);
		PointerTgtPtrBegin = TPR.TargetPointer;
if (!PointerTgtPtrBegin) {		if (!PointerTgtPtrBegin) {
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",		DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
DPxPTR(HstPtrVal));		DPxPTR(HstPtrVal));
continue;		continue;
}		}
if (IsHostPtr) {		if (IsHostPtr) {
DP("Unified memory is active, no need to map lambda captured"		DP("Unified memory is active, no need to map lambda captured"
"variable (" DPxMOD ")\n",		"variable (" DPxMOD ")\n",
Show All 12 Lines	if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
continue;		continue;
}		}
void *HstPtrBegin = Args[I];		void *HstPtrBegin = Args[I];
void *HstPtrBase = ArgBases[I];		void *HstPtrBase = ArgBases[I];
void *TgtPtrBegin;		void *TgtPtrBegin;
map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];		map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
ptrdiff_t TgtBaseOffset;		ptrdiff_t TgtBaseOffset;
bool IsLast, IsHostPtr; // unused.		bool IsLast, IsHostPtr; // unused.
		TargetPointerResultTy TPR;
if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {		if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
DP("Forwarding first-private value " DPxMOD " to the target construct\n",		DP("Forwarding first-private value " DPxMOD " to the target construct\n",
DPxPTR(HstPtrBase));		DPxPTR(HstPtrBase));
TgtPtrBegin = HstPtrBase;		TgtPtrBegin = HstPtrBase;
TgtBaseOffset = 0;		TgtBaseOffset = 0;
} else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {		} else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;		TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);		const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);
Show All 9 Lines	if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
if (Ret != OFFLOAD_SUCCESS) {		if (Ret != OFFLOAD_SUCCESS) {
REPORT("Failed to process %sprivate argument " DPxMOD "\n",		REPORT("Failed to process %sprivate argument " DPxMOD "\n",
(IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));		(IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));
return OFFLOAD_FAIL;		return OFFLOAD_FAIL;
}		}
} else {		} else {
if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)		if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
HstPtrBase = reinterpret_cast<void *>(HstPtrBase);		HstPtrBase = reinterpret_cast<void *>(HstPtrBase);
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,		TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
/UpdateRefCount=/false,		/UpdateRefCount=/false,
/UseHoldRefCount=/false, IsHostPtr);		/UseHoldRefCount=/false, IsHostPtr);
		TgtPtrBegin = TPR.TargetPointer;
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;		TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
#ifdef OMPTARGET_DEBUG		#ifdef OMPTARGET_DEBUG
void TgtPtrBase = (void )((intptr_t)TgtPtrBegin + TgtBaseOffset);		void TgtPtrBase = (void )((intptr_t)TgtPtrBegin + TgtBaseOffset);
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",		DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));		DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
#endif		#endif
}		}
TgtArgsPositions[I] = TgtArgs.size();		TgtArgsPositions[I] = TgtArgs.size();
▲ Show 20 Lines • Show All 139 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[OpenMP] Avoid costly shadow map traversals whenever possible
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 401489

openmp/libomptarget/include/device.h

openmp/libomptarget/src/api.cpp

openmp/libomptarget/src/device.cpp

openmp/libomptarget/src/omptarget.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[OpenMP] Avoid costly shadow map traversals whenever possibleClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 401489

openmp/libomptarget/include/device.h

openmp/libomptarget/src/api.cpp

openmp/libomptarget/src/device.cpp

openmp/libomptarget/src/omptarget.cpp

[OpenMP] Avoid costly shadow map traversals whenever possible
ClosedPublic