Index: openmp/libomptarget/src/device.h =================================================================== --- openmp/libomptarget/src/device.h +++ openmp/libomptarget/src/device.h @@ -228,8 +228,7 @@ void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool &IsHostPtr, bool MustContain = false, bool ForceDelete = false); - int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, - bool HasCloseModifier = false); + int deallocTgtPtr(void *TgtPtrBegin, int64_t Size); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int disassociatePtr(void *HstPtrBegin); Index: openmp/libomptarget/src/device.cpp =================================================================== --- openmp/libomptarget/src/device.cpp +++ openmp/libomptarget/src/device.cpp @@ -365,11 +365,7 @@ return NULL; } -int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, - bool HasCloseModifier) { - if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && - !HasCloseModifier) - return OFFLOAD_SUCCESS; +int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size) { // Check if the pointer is contained in any sub-nodes. int rc; DataMapMtx.lock(); Index: openmp/libomptarget/src/omptarget.cpp =================================================================== --- openmp/libomptarget/src/omptarget.cpp +++ openmp/libomptarget/src/omptarget.cpp @@ -606,12 +606,9 @@ void *HstPtrBegin; /// Size of the data int64_t DataSize; - /// Whether it has \p close modifier - bool HasCloseModifier; - DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasCloseModifier) - : HstPtrBegin(HstPtr), DataSize(Size), - HasCloseModifier(HasCloseModifier) {} + DeallocTgtPtrInfo(void *HstPtr, int64_t Size) + : HstPtrBegin(HstPtr), DataSize(Size) {} }; } // namespace @@ -676,7 +673,6 @@ (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) && !(FromMapper && I == 0); bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE; - bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE; bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT; // If PTR_AND_OBJ, HstPtrBegin is address of pointee @@ -736,15 +732,12 @@ if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) { bool Always = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; bool CopyMember = false; - if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || - HasCloseModifier) { + if (!IsHostPtr) { if (IsLast) CopyMember = true; } - if ((DelEntry || Always || CopyMember) && - !(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && - TgtPtrBegin == HstPtrBegin)) { + if ((DelEntry || Always || CopyMember) && !IsHostPtr) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, @@ -798,8 +791,8 @@ Device.ShadowMtx.unlock(); // Add pointer to the buffer for later deallocation - if (DelEntry) - DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasCloseModifier); + if (DelEntry && !IsHostPtr) + DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize); } } @@ -815,8 +808,7 @@ for (DeallocTgtPtrInfo &Info : DeallocTgtPtrs) { if (FromMapperBase && FromMapperBase == Info.HstPtrBegin) continue; - Ret = Device.deallocTgtPtr(Info.HstPtrBegin, Info.DataSize, - Info.HasCloseModifier); + Ret = Device.deallocTgtPtr(Info.HstPtrBegin, Info.DataSize); if (Ret != OFFLOAD_SUCCESS) { REPORT("Deallocating data from device failed.\n"); return OFFLOAD_FAIL; Index: openmp/libomptarget/test/unified_shared_memory/associate_ptr.c =================================================================== --- /dev/null +++ openmp/libomptarget/test/unified_shared_memory/associate_ptr.c @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +// REQUIRES: unified_shared_memory +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +// Fails on amdgcn with error: GPU Memory Error +// XFAIL: amdgcn-amd-amdhsa + +#include +#include +#include + +#pragma omp requires unified_shared_memory + +int main(int argc, char *argv[]) { + int dev = omp_get_default_device(); + int x = 10; + int *x_dev = (int *)omp_target_alloc(sizeof x, dev); + assert(x_dev && "expected omp_target_alloc to succeed"); + int rc = omp_target_associate_ptr(&x, x_dev, sizeof x, 0, dev); + assert(!rc && "expected omp_target_associate_ptr to succeed"); + + // To determine whether x needs to be transfered, the runtime cannot simply + // check whether unified shared memory is enabled and the 'close' modifier is + // specified. It must check whether x was previously placed in device memory + // by, for example, omp_target_associate_ptr. + #pragma omp target map(always, tofrom: x) + x = 20; + + // CHECK: x=20 + printf("x=%d\n", x); + // CHECK: present: 1 + printf("present: %d\n", omp_target_is_present(&x, dev)); + + return 0; +} Index: openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c =================================================================== --- openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c +++ openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c @@ -17,6 +17,7 @@ int fails; void *host_alloc = 0, *device_alloc = 0; int *a = (int *)malloc(N * sizeof(int)); + int dev = omp_get_default_device(); // Init for (int i = 0; i < N; ++i) { @@ -79,14 +80,25 @@ #pragma omp target enter data map(close, to : a[ : N]) #pragma omp target map(from : device_alloc) - { device_alloc = &a[0]; } + { + device_alloc = &a[0]; + a[0] = 99; + } + // 'close' is missing, so the runtime must check whether s is actually in + // shared memory in order to determine whether to transfer data and delete the + // allocation. #pragma omp target exit data map(from : a[ : N]) // CHECK: a has been mapped to the device. if (device_alloc != host_alloc) printf("a has been mapped to the device.\n"); + // CHECK: a[0]=99 + // CHECK: a is present: 0 + printf("a[0]=%d\n", a[0]); + printf("a is present: %d\n", omp_target_is_present(a, dev)); + free(a); // CHECK: Done! Index: openmp/libomptarget/test/unified_shared_memory/close_member.c =================================================================== --- /dev/null +++ openmp/libomptarget/test/unified_shared_memory/close_member.c @@ -0,0 +1,44 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +// REQUIRES: unified_shared_memory +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +// Fails on amdgcn with error: GPU Memory Error +// XFAIL: amdgcn-amd-amdhsa + +#include +#include + +#pragma omp requires unified_shared_memory + +struct S { + int x; + int y; +}; + +int main(int argc, char *argv[]) { + int dev = omp_get_default_device(); + struct S s = {10, 20}; + + #pragma omp target enter data map(close, to: s) + #pragma omp target map(alloc: s) + { + s.x = 11; + s.y = 21; + } + // To determine whether x needs to be transfered or deleted, the runtime + // cannot simply check whether unified shared memory is enabled and the + // 'close' modifier is specified. It must check whether x was previously + // placed in device memory by, for example, a 'close' modifier that isn't + // specified here. The following struct member case checks a special code + // path in the runtime implementation where members are transferred before + // deletion of the struct. + #pragma omp target exit data map(from: s.x, s.y) + + // CHECK: s.x=11, s.y=21 + printf("s.x=%d, s.y=%d\n", s.x, s.y); + // CHECK: present: 0 + printf("present: %d\n", omp_target_is_present(&s, dev)); + + return 0; +}