Index: libomptarget/src/api.cpp =================================================================== --- libomptarget/src/api.cpp +++ libomptarget/src/api.cpp @@ -113,7 +113,15 @@ DeviceTy& Device = Devices[device_num]; bool IsLast; // not used - int rc = (Device.getTgtPtrBegin(ptr, 0, IsLast, false) != NULL); + bool IsHostPtr; + void *TgtPtr = Device.getTgtPtrBegin(ptr, 0, IsLast, false, IsHostPtr); + int rc = (TgtPtr != NULL); + // Under unified memory the host pointer can be returned by the + // getTgtPtrBegin() function which means that there is no device + // corresponding point for ptr. This function should return false + // in that situation. + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) + rc = !IsHostPtr; DP("Call to omp_target_is_present returns %d\n", rc); return rc; } Index: libomptarget/src/device.h =================================================================== --- libomptarget/src/device.h +++ libomptarget/src/device.h @@ -137,10 +137,10 @@ long getMapEntryRefCnt(void *HstPtrBegin); LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - bool &IsNew, bool IsImplicit, bool UpdateRefCount = true); + bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount); + bool UpdateRefCount, bool &IsHostPtr); int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int disassociatePtr(void *HstPtrBegin); Index: libomptarget/src/device.cpp =================================================================== --- libomptarget/src/device.cpp +++ libomptarget/src/device.cpp @@ -157,41 +157,66 @@ // If NULL is returned, then either data allocation failed or the user tried // to do an illegal mapping. void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, - int64_t Size, bool &IsNew, bool IsImplicit, bool UpdateRefCount) { + int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit, + bool UpdateRefCount) { void *rc = NULL; DataMapMtx.lock(); LookupResult lr = lookupMapping(HstPtrBegin, Size); - // Check if the pointer is contained. - if (lr.Flags.IsContained || - ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) { + // If unified shared memory is active implicitly mapped variables that are not + // privatized, use host address. Any explicitely mapped variables also use + // host address where correctness is not impeded. In all other cases + // maps are respected. + // TODO: In addition to the mapping rules above, when the close map + // modifier is implemented, foce the mapping of the variable to the device. + if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + (IsImplicit || !((lr.Flags.IsContained || + lr.Flags.ExtendsBefore || + lr.Flags.ExtendsAfter) && !Size))) { auto &HT = *lr.Entry; - IsNew = false; - - if (UpdateRefCount) - ++HT.RefCount; - - uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); - DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " - "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""), - DPxPTR(HstPtrBegin), DPxPTR(tp), Size, - (UpdateRefCount ? " updated" : ""), - (CONSIDERED_INF(HT.RefCount)) ? "INF" : - std::to_string(HT.RefCount).c_str()); - rc = (void *)tp; - } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) { - // Explicit extension of mapped data - not allowed. - DP("Explicit extension of mapping is not allowed.\n"); - } else if (Size) { - // If it is not contained and Size > 0 we should create a new entry for it. - IsNew = true; - uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); - DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", " - "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase), - DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp)); - HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase, - (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp)); + uintptr_t tp = HT.HstPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n", DPxPTR(tp), + Size, (UpdateRefCount ? " updated" : "")); + IsHostPtr = true; rc = (void *)tp; + } else { + // Check if the pointer is contained. + if (lr.Flags.IsContained || + ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) { + auto &HT = *lr.Entry; + IsNew = false; + + if (UpdateRefCount) + ++HT.RefCount; + + uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + DP("Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " + "Size=%ld,%s RefCount=%s\n", (IsImplicit ? " (implicit)" : ""), + DPxPTR(HstPtrBegin), DPxPTR(tp), Size, + (UpdateRefCount ? " updated" : ""), + (CONSIDERED_INF(HT.RefCount)) ? "INF" : + std::to_string(HT.RefCount).c_str()); + rc = (void *)tp; + } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) { + // Explicit extension of mapped data - not allowed. + DP("Explicit extension of mapping is not allowed.\n"); + } else if (Size) { + // If it is not contained and Size > 0 we should create a new entry for it. + IsNew = true; + uintptr_t tp = (uintptr_t)NULL; + // If the use_device_ptr clause is used, we create a copy of the variable + // on the device even in the unified shared memory case. + // The explicit usage of the use_device_ptr clause forces the variable to + // have a device side version. + if (!(RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) + tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); + DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", " + "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase), + DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp)); + HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase, + (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp)); + rc = (void *)tp; + } } DataMapMtx.unlock(); @@ -202,27 +227,44 @@ // Return the target pointer begin (where the data will be moved). // Decrement the reference counter if called from target_data_end. void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount) { + bool UpdateRefCount, bool &IsHostPtr) { void *rc = NULL; DataMapMtx.lock(); LookupResult lr = lookupMapping(HstPtrBegin, Size); - - if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { + IsHostPtr = false; + + // If the value isn't found in the mapping and unified shared memory + // is on then it means we have stumbled upon an implcitly mapped value + // which we need to use directly from the host. + if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + !(lr.Flags.IsContained || + lr.Flags.ExtendsBefore || + lr.Flags.ExtendsAfter)) { auto &HT = *lr.Entry; - IsLast = !(HT.RefCount > 1); - - if (HT.RefCount > 1 && UpdateRefCount) - --HT.RefCount; - - uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); - DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " - "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size, - (UpdateRefCount ? " updated" : ""), - (CONSIDERED_INF(HT.RefCount)) ? "INF" : - std::to_string(HT.RefCount).c_str()); + IsLast = false; + uintptr_t tp = HT.HstPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n", DPxPTR(tp), + Size, (UpdateRefCount ? " updated" : "")); + IsHostPtr = true; rc = (void *)tp; } else { - IsLast = false; + if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { + auto &HT = *lr.Entry; + IsLast = !(HT.RefCount > 1); + + if (HT.RefCount > 1 && UpdateRefCount) + --HT.RefCount; + + uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + DP("Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " + "Size=%ld,%s RefCount=%s\n", DPxPTR(HstPtrBegin), DPxPTR(tp), Size, + (UpdateRefCount ? " updated" : ""), + (CONSIDERED_INF(HT.RefCount)) ? "INF" : + std::to_string(HT.RefCount).c_str()); + rc = (void *)tp; + } else { + IsLast = false; + } } DataMapMtx.unlock(); @@ -244,6 +286,8 @@ } int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) { + if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) + return OFFLOAD_SUCCESS; // Check if the pointer is contained in any sub-nodes. int rc; DataMapMtx.lock(); Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -242,6 +242,7 @@ // Address of pointer on the host and device, respectively. void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin; bool IsNew, Pointer_IsNew; + bool IsHostPtr = false; bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we // have reached this point via __tgt_target_data_begin and not __tgt_target @@ -253,7 +254,7 @@ DP("Has a pointer entry: \n"); // base is address of pointer. Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase, - sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef); + sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef); if (!Pointer_TgtPtrBegin) { DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " "illegal mapping).\n"); @@ -269,7 +270,7 @@ } void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, - data_size, IsNew, IsImplicit, UpdateRef); + data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef); if (!TgtPtrBegin && data_size) { // If data_size==0, then the argument could be a zero-length pointer to // NULL, so getOrAlloc() returning NULL is not an error. @@ -289,19 +290,21 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { bool copy = false; - if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { - copy = true; - } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) { - // Copy data only if the "parent" struct has RefCount==1. - int32_t parent_idx = member_of(arg_types[i]); - long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); - assert(parent_rc > 0 && "parent struct not found"); - if (parent_rc == 1) { + if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { + if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { copy = true; + } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) { + // Copy data only if the "parent" struct has RefCount==1. + int32_t parent_idx = member_of(arg_types[i]); + long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); + assert(parent_rc > 0 && "parent struct not found"); + if (parent_rc == 1) { + copy = true; + } } } - if (copy) { + if (copy && !IsHostPtr) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size); @@ -312,7 +315,7 @@ } } - if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { + if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; @@ -363,14 +366,14 @@ } } - bool IsLast; + bool IsLast, IsHostPtr; bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) || (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ); bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE; // If PTR_AND_OBJ, HstPtrBegin is address of pointee void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast, - UpdateRef); + UpdateRef, IsHostPtr); DP("There are %" PRId64 " bytes allocated at target address " DPxMOD " - is%s last\n", data_size, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not")); @@ -387,21 +390,28 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; bool CopyMember = false; - if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && - !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { - // Copy data only if the "parent" struct has RefCount==1. - int32_t parent_idx = member_of(arg_types[i]); - long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); - assert(parent_rc > 0 && "parent struct not found"); - if (parent_rc == 1) { - CopyMember = true; + if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { + if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && + !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { + // Copy data only if the "parent" struct has RefCount==1. + int32_t parent_idx = member_of(arg_types[i]); + long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); + assert(parent_rc > 0 && "parent struct not found"); + if (parent_rc == 1) { + CopyMember = true; + } } } if (DelEntry || Always || CopyMember) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); + int rt; + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) + rt = OFFLOAD_SUCCESS; + else + rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); if (rt != OFFLOAD_SUCCESS) { DP("Copying data from device failed.\n"); return OFFLOAD_FAIL; @@ -471,9 +481,9 @@ void *HstPtrBegin = args[i]; int64_t MapSize = arg_sizes[i]; - bool IsLast; + bool IsLast, IsHostPtr; void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, - false); + false, IsHostPtr); if (!TgtPtrBegin) { DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); continue; @@ -482,7 +492,14 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); + int rt; + // Act as if a copy to device was successful in the case of + // unified memory where only a host version of the data exists. + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) + rt = OFFLOAD_SUCCESS; + else + rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); if (rt != OFFLOAD_SUCCESS) { DP("Copying data from device failed.\n"); return OFFLOAD_FAIL; @@ -509,7 +526,16 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); + int rt; + // No need to actually copy any data to device. The data is available + // on the host and can be accessed by the device via unified memory. + // When host and device pointers are equal it means that this is called + // internally by the runtime and not directly by the user via the API. + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) + rt = OFFLOAD_SUCCESS; + else + rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); return OFFLOAD_FAIL; @@ -527,8 +553,12 @@ DP("Restoring original target pointer value " DPxMOD " for target " "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr)); - rt = Device.data_submit(it->second.TgtPtrAddr, - &it->second.TgtPtrVal, sizeof(void *)); + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) + rt = OFFLOAD_SUCCESS; + else + rt = Device.data_submit(it->second.TgtPtrAddr, + &it->second.TgtPtrVal, sizeof(void *)); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); Device.ShadowMtx.unlock(); @@ -640,14 +670,15 @@ void *HstPtrVal = args[i]; void *HstPtrBegin = args_base[i]; void *HstPtrBase = args[idx]; - bool IsLast; // unused. + bool IsLast, IsHostPtr; // unused. void *TgtPtrBase = (void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]); DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); void *Pointer_TgtPtrBegin = - Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false); + Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false, + IsHostPtr); if (!Pointer_TgtPtrBegin) { DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", DPxPTR(HstPtrVal)); @@ -655,8 +686,13 @@ } DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); - int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin, - sizeof(void *)); + int rt; + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) + rt = OFFLOAD_SUCCESS; + else + rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin, + sizeof(void *)); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); return OFFLOAD_FAIL; @@ -668,7 +704,7 @@ void *HstPtrBase = args_base[i]; void *TgtPtrBegin; ptrdiff_t TgtBaseOffset; - bool IsLast; // unused. + bool IsLast, IsHostPtr; // unused. if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) { DP("Forwarding first-private value " DPxMOD " to the target construct\n", DPxPTR(HstPtrBase)); @@ -705,14 +741,14 @@ } } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, - false); + false, IsHostPtr); TgtBaseOffset = 0; // no offset for ptrs. DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to " "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), DPxPTR(HstPtrBase)); } else { TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast, - false); + false, IsHostPtr); TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; #ifdef OMPTARGET_DEBUG void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); Index: libomptarget/test/api/api.c =================================================================== --- /dev/null +++ libomptarget/test/api/api.c @@ -0,0 +1,149 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#pragma omp requires unified_shared_memory + +#define N 1024 + +void init(double A[], double B[], double C[]) { + for(int i=0; i +#include + +#pragma omp requires unified_shared_memory + +#define N 1024 + +int main(int argc, char *argv[]) { + int fails; + long long host_alloc, device_alloc; + long long host_data, device_data; + double *alloc = (double *)malloc(N*sizeof(double)); + double data[N]; + + for(int i=0; i