diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -283,8 +283,14 @@ bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr, bool MustContain = false, bool ForceDelete = false); - int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool HasCloseModifier, - bool HasHoldModifier); + /// For the map entry for \p HstPtrBegin, decrement the reference count + /// specified by \p HasHoldModifier and, if the the total reference count is + /// then zero, deallocate the corresponding device storage and remove the map + /// entry. Return \c OFFLOAD_SUCCESS if the map entry existed, and return + /// \c OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling + /// this function if the map entry is not expected to exist because + /// \p HstPtrBegin uses shared memory. + int deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool HasHoldModifier); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int disassociatePtr(void *HstPtrBegin); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -387,10 +387,7 @@ } int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, - bool HasCloseModifier, bool HasHoldModifier) { - if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && - !HasCloseModifier) - return OFFLOAD_SUCCESS; + bool HasHoldModifier) { // Check if the pointer is contained in any sub-nodes. int rc; DataMapMtx.lock(); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -609,15 +609,11 @@ void *HstPtrBegin; /// Size of the data int64_t DataSize; - /// Whether it has \p close modifier - bool HasCloseModifier; /// Whether it has \p ompx_hold modifier bool HasHoldModifier; - DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasCloseModifier, - bool HasHoldModifier) - : HstPtrBegin(HstPtr), DataSize(Size), HasCloseModifier(HasCloseModifier), - HasHoldModifier(HasHoldModifier) {} + DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasHoldModifier) + : HstPtrBegin(HstPtr), DataSize(Size), HasHoldModifier(HasHoldModifier) {} }; } // namespace @@ -682,7 +678,6 @@ (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) && !(FromMapper && I == 0); bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE; - bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE; bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT; bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD; @@ -743,15 +738,12 @@ if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) { bool Always = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; bool CopyMember = false; - if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || - HasCloseModifier) { + if (!IsHostPtr) { if (IsLast) CopyMember = true; } - if ((DelEntry || Always || CopyMember) && - !(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && - TgtPtrBegin == HstPtrBegin)) { + if ((DelEntry || Always || CopyMember) && !IsHostPtr) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, @@ -805,9 +797,8 @@ Device.ShadowMtx.unlock(); // Add pointer to the buffer for later deallocation - if (DelEntry) - DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasCloseModifier, - HasHoldModifier); + if (DelEntry && !IsHostPtr) + DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasHoldModifier); } } @@ -824,7 +815,7 @@ if (FromMapperBase && FromMapperBase == Info.HstPtrBegin) continue; Ret = Device.deallocTgtPtr(Info.HstPtrBegin, Info.DataSize, - Info.HasCloseModifier, Info.HasHoldModifier); + Info.HasHoldModifier); if (Ret != OFFLOAD_SUCCESS) { REPORT("Deallocating data from device failed.\n"); return OFFLOAD_FAIL; diff --git a/openmp/libomptarget/test/unified_shared_memory/associate_ptr.c b/openmp/libomptarget/test/unified_shared_memory/associate_ptr.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/associate_ptr.c @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +// REQUIRES: unified_shared_memory +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +// Fails on amdgcn with error: GPU Memory Error +// XFAIL: amdgcn-amd-amdhsa + +#include +#include +#include + +#pragma omp requires unified_shared_memory + +int main(int argc, char *argv[]) { + int dev = omp_get_default_device(); + int x = 10; + int *x_dev = (int *)omp_target_alloc(sizeof x, dev); + assert(x_dev && "expected omp_target_alloc to succeed"); + int rc = omp_target_associate_ptr(&x, x_dev, sizeof x, 0, dev); + assert(!rc && "expected omp_target_associate_ptr to succeed"); + + // To determine whether x needs to be transfered, the runtime cannot simply + // check whether unified shared memory is enabled and the 'close' modifier is + // specified. It must check whether x was previously placed in device memory + // by, for example, omp_target_associate_ptr. + #pragma omp target map(always, tofrom: x) + x = 20; + + // CHECK: x=20 + printf("x=%d\n", x); + // CHECK: present: 1 + printf("present: %d\n", omp_target_is_present(&x, dev)); + + return 0; +} diff --git a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c --- a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c +++ b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c @@ -17,6 +17,7 @@ int fails; void *host_alloc = 0, *device_alloc = 0; int *a = (int *)malloc(N * sizeof(int)); + int dev = omp_get_default_device(); // Init for (int i = 0; i < N; ++i) { @@ -79,14 +80,25 @@ #pragma omp target enter data map(close, to : a[ : N]) #pragma omp target map(from : device_alloc) - { device_alloc = &a[0]; } + { + device_alloc = &a[0]; + a[0] = 99; + } + // 'close' is missing, so the runtime must check whether s is actually in + // shared memory in order to determine whether to transfer data and delete the + // allocation. #pragma omp target exit data map(from : a[ : N]) // CHECK: a has been mapped to the device. if (device_alloc != host_alloc) printf("a has been mapped to the device.\n"); + // CHECK: a[0]=99 + // CHECK: a is present: 0 + printf("a[0]=%d\n", a[0]); + printf("a is present: %d\n", omp_target_is_present(a, dev)); + free(a); // CHECK: Done! diff --git a/openmp/libomptarget/test/unified_shared_memory/close_member.c b/openmp/libomptarget/test/unified_shared_memory/close_member.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/close_member.c @@ -0,0 +1,44 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +// REQUIRES: unified_shared_memory +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +// Fails on amdgcn with error: GPU Memory Error +// XFAIL: amdgcn-amd-amdhsa + +#include +#include + +#pragma omp requires unified_shared_memory + +struct S { + int x; + int y; +}; + +int main(int argc, char *argv[]) { + int dev = omp_get_default_device(); + struct S s = {10, 20}; + + #pragma omp target enter data map(close, to: s) + #pragma omp target map(alloc: s) + { + s.x = 11; + s.y = 21; + } + // To determine whether x needs to be transfered or deleted, the runtime + // cannot simply check whether unified shared memory is enabled and the + // 'close' modifier is specified. It must check whether x was previously + // placed in device memory by, for example, a 'close' modifier that isn't + // specified here. The following struct member case checks a special code + // path in the runtime implementation where members are transferred before + // deletion of the struct. + #pragma omp target exit data map(from: s.x, s.y) + + // CHECK: s.x=11, s.y=21 + printf("s.x=%d, s.y=%d\n", s.x, s.y); + // CHECK: present: 0 + printf("present: %d\n", omp_target_is_present(&s, dev)); + + return 0; +}