Index: libomptarget/include/omptarget.h =================================================================== --- libomptarget/include/omptarget.h +++ libomptarget/include/omptarget.h @@ -47,6 +47,8 @@ OMP_TGT_MAPTYPE_LITERAL = 0x100, // mapping is implicit OMP_TGT_MAPTYPE_IMPLICIT = 0x200, + // copy data to device + OMP_TGT_MAPTYPE_CLOSE = 0x400, // member of struct, member given by [16 MSBs] - 1 OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000 }; Index: libomptarget/src/device.h =================================================================== --- libomptarget/src/device.h +++ libomptarget/src/device.h @@ -137,11 +137,13 @@ long getMapEntryRefCnt(void *HstPtrBegin); LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true); + bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true, + bool IsCloseModifier = false); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool &IsHostPtr); - int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete); + int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete, + bool IsCloseModifier = false); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int disassociatePtr(void *HstPtrBegin); Index: libomptarget/src/device.cpp =================================================================== --- libomptarget/src/device.cpp +++ libomptarget/src/device.cpp @@ -158,7 +158,7 @@ // to do an illegal mapping. void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit, - bool UpdateRefCount) { + bool UpdateRefCount, bool IsCloseModifier) { void *rc = NULL; IsHostPtr = false; DataMapMtx.lock(); @@ -170,7 +170,7 @@ // maps are respected. // TODO: In addition to the mapping rules above, when the close map // modifier is implemented, foce the mapping of the variable to the device. - if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !IsCloseModifier && (IsImplicit || !lr.Flags.IsContained || Size)) { DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n", DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : "")); @@ -200,7 +200,10 @@ } else if (Size) { // If it is not contained and Size > 0 we should create a new entry for it. IsNew = true; - uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); + uintptr_t tp = 0; + if (!(RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && IsCloseModifier)) + tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", " "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp)); @@ -274,8 +277,9 @@ return NULL; } -int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) { - if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) +int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete, + bool IsCloseModifier) { + if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !IsCloseModifier) return OFFLOAD_SUCCESS; // Check if the pointer is contained in any sub-nodes. int rc; Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -244,6 +244,9 @@ bool IsNew, Pointer_IsNew; bool IsHostPtr = false; bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; + // Force the creation of a device side copy of the data when: + // a close map modifier was associated with a map that contained a to. + bool IsCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE; // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we // have reached this point via __tgt_target_data_begin and not __tgt_target // then no argument is marked as TARGET_PARAM ("omp target data map" is not @@ -254,7 +257,8 @@ DP("Has a pointer entry: \n"); // base is address of pointer. Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase, - sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef); + sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef, + IsCloseModifier); if (!Pointer_TgtPtrBegin) { DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " "illegal mapping).\n"); @@ -270,7 +274,7 @@ } void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, - data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef); + data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef, IsCloseModifier); if (!TgtPtrBegin && data_size) { // If data_size==0, then the argument could be a zero-length pointer to // NULL, so getOrAlloc() returning NULL is not an error. @@ -290,8 +294,11 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { bool copy = false; - if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { - if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { + bool Close = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE; + if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + Close)) { + if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS) || Close) { copy = true; } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) { // Copy data only if the "parent" struct has RefCount==1. @@ -370,6 +377,7 @@ bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) || (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ); bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE; + bool IsCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE; // If PTR_AND_OBJ, HstPtrBegin is address of pointee void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast, @@ -389,8 +397,11 @@ // Move data back to the host if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; + bool Close = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE; bool CopyMember = false; - if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { + if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + Close)) { if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { // Copy data only if the "parent" struct has RefCount==1. @@ -403,9 +414,9 @@ } } - if ((DelEntry || Always || CopyMember) && + if ((DelEntry || Always || CopyMember || Close) && !(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && - TgtPtrBegin == HstPtrBegin)) { + TgtPtrBegin == HstPtrBegin && !Close)) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); @@ -455,7 +466,8 @@ // Deallocate map if (DelEntry) { - int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete); + int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete, + IsCloseModifier); if (rt != OFFLOAD_SUCCESS) { DP("Deallocating data from device failed.\n"); return OFFLOAD_FAIL; Index: libomptarget/test/unified_shared_memory/close_modifier.c =================================================================== --- /dev/null +++ libomptarget/test/unified_shared_memory/close_modifier.c @@ -0,0 +1,131 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#pragma omp requires unified_shared_memory + +#define N 1024 + +int main(int argc, char *argv[]) { + int fails; + long long host_alloc, device_alloc; + long long host_data, device_data; + double *alloc = (double *)malloc(N*sizeof(double)); + double data[N]; + + for(int i=0; i