diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -245,11 +245,12 @@ unsigned IsContained : 1; unsigned ExtendsBefore : 1; unsigned ExtendsAfter : 1; + unsigned OnlyBaseFound : 1; } Flags; HostDataToTargetListTy::iterator Entry; - LookupResult() : Flags({0, 0, 0}), Entry() {} + LookupResult() : Flags({0, 0, 0, 0}), Entry() {} }; /// This struct will be returned by \p DeviceTy::getTargetPointer which provides @@ -315,7 +316,7 @@ // Return true if data can be copied to DstDevice directly bool isDataExchangable(const DeviceTy &DstDevice); - LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); + LookupResult lookupMapping(void *HstPtrBase, void *HstPtrBegin, int64_t Size); /// Get the target pointer based on host pointer begin and base. If the /// mapping already exists, the target pointer will be returned directly. In /// addition, if required, the memory region pointed by \p HstPtrBegin of size @@ -333,8 +334,9 @@ bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); - TargetPointerResultTy getTgtPtrBegin(void *HstPtrBegin, int64_t Size, - bool &IsLast, bool UpdateRefCount, + TargetPointerResultTy getTgtPtrBegin(void *HstPtrBegin, void *HstPtrBase, + int64_t Size, bool &IsLast, + bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr, bool MustContain = false, bool ForceDelete = false); diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -108,8 +108,8 @@ bool IsLast; // not used bool IsHostPtr; TargetPointerResultTy TPR = - Device.getTgtPtrBegin(const_cast(ptr), 0, IsLast, - /*UpdateRefCount=*/false, + Device.getTgtPtrBegin(const_cast(ptr), const_cast(ptr), 0, + IsLast, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, IsHostPtr); int rc = (TPR.TargetPointer != NULL); // Under unified memory the host pointer can be returned by the diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -137,8 +137,10 @@ return OFFLOAD_FAIL; } -LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) { +LookupResult DeviceTy::lookupMapping(void *HstPtrBase, void *HstPtrBegin, + int64_t Size) { uintptr_t hp = (uintptr_t)HstPtrBegin; + uintptr_t hb = (uintptr_t)HstPtrBase; LookupResult lr; DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n", @@ -157,11 +159,27 @@ (hp + Size) <= HT.HstPtrEnd; // Does it extend beyond the mapped region? lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; + // Did we just find the base? + // There is a special case where we map a struct via a double pointer, e.g. + // typedef struct {...} S; + // S s; + // S *sp = &s; + // S **spp = &sp; + // #pragma omp target map (to: spp[0][0]) {...} + // In that case, the record for pointer sp will share the same base as the + // record for struct s, but we shouldn't report that we only found the base, + // i.e. we must let libomptarget create a new record in HostDataToTargetMap + // for the struct s itself which will have the same base as the record for + // pointer sp. This scenario can only happen in the left bin. + if (!lr.Flags.IsContained && !lr.Flags.ExtendsAfter && + hb == HT.HstPtrBase && + HT.HstPtrEnd - HT.HstPtrBegin != sizeof(void *)) + lr.Flags.OnlyBaseFound = true; } // check the right bin - if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) && - upper != HostDataToTargetMap.end()) { + if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter || lr.Flags.OnlyBaseFound) + && upper != HostDataToTargetMap.end()) { lr.Entry = upper; auto &HT = *lr.Entry; // Does it extend into an already mapped region? @@ -169,6 +187,10 @@ hp < HT.HstPtrBegin && (hp + Size) > HT.HstPtrBegin; // Does it extend beyond the mapped region? lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; + // Did we just find the base? + if (!lr.Flags.ExtendsBefore && !lr.Flags.ExtendsAfter && + hb == HT.HstPtrBase) + lr.Flags.OnlyBaseFound = true; } if (lr.Flags.ExtendsBefore) { @@ -196,7 +218,7 @@ DataMapMtx.lock(); - LookupResult LR = lookupMapping(HstPtrBegin, Size); + LookupResult LR = lookupMapping(HstPtrBase, HstPtrBegin, Size); auto Entry = LR.Entry; // Check if the pointer is contained. @@ -204,7 +226,8 @@ // lead to the IsContained flag to be true - then we must ensure that the // device address is returned even under unified memory conditions. if (LR.Flags.IsContained || - ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) { + ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit) || + (LR.Flags.OnlyBaseFound && !Size)) { auto &HT = *LR.Entry; const char *RefCountAction; assert(HT.getTotalRefCount() > 0 && "expected existing RefCount > 0"); @@ -219,7 +242,14 @@ } const char *DynRefCountAction = HasHoldModifier ? "" : RefCountAction; const char *HoldRefCountAction = HasHoldModifier ? RefCountAction : ""; - uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + + uintptr_t Ptr = 0; + if (LR.Flags.OnlyBaseFound) + // Return the implied device base + Ptr = HT.TgtPtrBegin - (HT.HstPtrBegin - HT.HstPtrBase); + else + // Return the device address corresponding to HstPtrBegin + Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n", @@ -240,6 +270,13 @@ MESSAGE("device mapping required by 'present' map type modifier does not " "exist for host address " DPxMOD " (%" PRId64 " bytes)", DPxPTR(HstPtrBegin), Size); + } else if (LR.Flags.OnlyBaseFound && Size) { + // If we only found the base address but a size has been provided, it + // means that the user tried to map another distinct chunk of an already + // partially-mapped object, which is illegal. + MESSAGE("Found record of existing mapping with the requested base address " + DPxMOD " but disjoint mapped data, mapping of two distinct chunks " + "of the same object is not allowed.", DPxPTR(HstPtrBase)); } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) { // If unified shared memory is active, implicitly mapped variables that are @@ -336,18 +373,20 @@ // Return the target pointer begin (where the data will be moved). // Decrement the reference counter if called from targetDataEnd. TargetPointerResultTy -DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount, bool UseHoldRefCount, - bool &IsHostPtr, bool MustContain, bool ForceDelete) { +DeviceTy::getTgtPtrBegin(void *HstPtrBegin, void *HstPtrBase, int64_t Size, + bool &IsLast, bool UpdateRefCount, + bool UseHoldRefCount, bool &IsHostPtr, + bool MustContain, bool ForceDelete) { void *TargetPointer = NULL; bool IsNew = false; IsHostPtr = false; IsLast = false; DataMapMtx.lock(); - LookupResult lr = lookupMapping(HstPtrBegin, Size); + LookupResult lr = lookupMapping(HstPtrBase, HstPtrBegin, Size); if (lr.Flags.IsContained || - (!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter))) { + (!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter)) || + lr.Flags.OnlyBaseFound) { auto &HT = *lr.Entry; // We do not zero the total reference count here. deallocTgtPtr does that // atomically with removing the mapping. Otherwise, before this thread @@ -376,7 +415,14 @@ } const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction; const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : ""; - uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + + uintptr_t tp = 0; + if (lr.Flags.OnlyBaseFound) + // Return the implied device base + tp = HT.TgtPtrBegin - (HT.HstPtrBegin - HT.HstPtrBase); + else + // Return the device address corresponding to HstPtrBegin + tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n", @@ -402,7 +448,7 @@ // Lock-free version called when loading global symbols from the fat binary. void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) { uintptr_t hp = (uintptr_t)HstPtrBegin; - LookupResult lr = lookupMapping(HstPtrBegin, Size); + LookupResult lr = lookupMapping(nullptr, HstPtrBegin, Size); if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { auto &HT = *lr.Entry; uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin); @@ -417,7 +463,7 @@ // Check if the pointer is contained in any sub-nodes. int Ret = OFFLOAD_SUCCESS; DataMapMtx.lock(); - LookupResult lr = lookupMapping(HstPtrBegin, Size); + LookupResult lr = lookupMapping(nullptr, HstPtrBegin, Size); if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { auto &HT = *lr.Entry; if (HT.decRefCount(HasHoldModifier) == 0) { @@ -496,7 +542,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, AsyncInfoTy &AsyncInfo) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - LookupResult LR = lookupMapping(HstPtrBegin, Size); + LookupResult LR = lookupMapping(nullptr, HstPtrBegin, Size); auto *HT = &*LR.Entry; INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, @@ -518,7 +564,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size, AsyncInfoTy &AsyncInfo) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - LookupResult LR = lookupMapping(HstPtrBegin, Size); + LookupResult LR = lookupMapping(nullptr, HstPtrBegin, Size); auto *HT = &*LR.Entry; INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -696,6 +696,7 @@ } void *HstPtrBegin = Args[I]; + void *HstPtrBase = ArgBases[I]; int64_t DataSize = ArgSizes[I]; // Adjust for proper alignment if this is a combined entry (for structs). // Look at the next argument - if that is MEMBER_OF this one, then this one @@ -724,8 +725,8 @@ // If PTR_AND_OBJ, HstPtrBegin is address of pointee TargetPointerResultTy TPR = Device.getTgtPtrBegin( - HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr, - !IsImplicit, ForceDelete); + HstPtrBegin, HstPtrBase, DataSize, IsLast, UpdateRef, HasHoldModifier, + IsHostPtr, !IsImplicit, ForceDelete); void *TgtPtrBegin = TPR.TargetPointer; if (!TgtPtrBegin && (DataSize || HasPresentModifier)) { DP("Mapping does not exist (%s)\n", @@ -855,7 +856,7 @@ TIMESCOPE_WITH_IDENT(loc); bool IsLast, IsHostPtr; TargetPointerResultTy TPR = Device.getTgtPtrBegin( - HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false, + HstPtrBegin, ArgsBase, ArgSize, IsLast, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, IsHostPtr, /*MustContain=*/true); void *TgtPtrBegin = TPR.TargetPointer; if (!TgtPtrBegin) { @@ -1298,8 +1299,8 @@ void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation(); TargetPointerResultTy TPR = Device.getTgtPtrBegin( - HstPtrVal, ArgSizes[I], IsLast, /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr); + HstPtrVal, HstPtrBegin, ArgSizes[I], IsLast, + /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, IsHostPtr); PointerTgtPtrBegin = TPR.TargetPointer; if (!PointerTgtPtrBegin) { DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", @@ -1355,7 +1356,7 @@ } else { if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) HstPtrBase = *reinterpret_cast(HstPtrBase); - TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast, + TPR = Device.getTgtPtrBegin(HstPtrBegin, HstPtrBase, ArgSizes[I], IsLast, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, IsHostPtr); TgtPtrBegin = TPR.TargetPointer; diff --git a/openmp/libomptarget/test/mapping/array_section_implicit_capture.c b/openmp/libomptarget/test/mapping/array_section_implicit_capture.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/array_section_implicit_capture.c @@ -0,0 +1,59 @@ +// RUN: %libomptarget-compile-generic -fopenmp-version=51 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +#include +#include + +#define N 1024 +#define FROM 64 +#define LENGTH 128 + +int main() +{ + float *A = (float *) malloc(N * sizeof(float)); + float *B = (float *) malloc(N * sizeof(float)); + float *C = (float *) malloc(N * sizeof(float)); + + for (int i = 0; i < N; i++) { + C[i] = 0.0; + } + + for (int i = 0; i < N; i++) { + A[i] = i; + B[i] = 2*i; + } + + #pragma omp target enter data map(to: A[FROM:LENGTH], B[FROM:LENGTH]) + #pragma omp target enter data map(alloc: C[FROM:LENGTH]) + + // A, B and C have been mapped starting at index FROM, but inside the kernel + // they are captured implicitly so the library must look them up using their + // base address. + #pragma omp target + { + for (int i = FROM; i < FROM+LENGTH; i++) { + C[i] = A[i] + B[i]; + } + } + + #pragma omp target exit data map(from: C[FROM:LENGTH]) + #pragma omp target exit data map(delete: A[FROM:LENGTH], B[FROM:LENGTH]) + + int errors = 0; + for (int i = FROM; i < FROM+LENGTH; i++) + if (C[i] != A[i] + B[i]) + ++errors; + + // CHECK: Success + if (errors) + fprintf(stderr, "Failure\n"); + else + fprintf(stderr, "Success\n"); + + free(A); + free(B); + free(C); + + return 0; +} diff --git a/openmp/libomptarget/test/mapping/array_section_use_device_ptr.c b/openmp/libomptarget/test/mapping/array_section_use_device_ptr.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/array_section_use_device_ptr.c @@ -0,0 +1,38 @@ +// RUN: %libomptarget-compile-generic -fopenmp-version=51 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +#include +#include + +#define N 1024 +#define FROM 64 +#define LENGTH 128 + +int main() +{ + float *A = (float *) malloc(N * sizeof(float)); + + #pragma omp target enter data map(to: A[FROM:LENGTH]) + + // A, has been mapped starting at index FROM, but inside the use_device_ptr + // clause it is captured by base so the library must look it up using the + // base address. + + float *A_dev = NULL; + #pragma omp target data use_device_ptr(A) + { + A_dev = A; + } + #pragma omp target exit data map(delete: A[FROM:LENGTH]) + + // CHECK: Success + if (A_dev == NULL || A_dev == A) + fprintf(stderr, "Failure\n"); + else + fprintf(stderr, "Success\n"); + + free(A); + + return 0; +} diff --git a/openmp/libomptarget/test/mapping/two_chunks_via_ptrs.c b/openmp/libomptarget/test/mapping/two_chunks_via_ptrs.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/two_chunks_via_ptrs.c @@ -0,0 +1,21 @@ +// RUN: %libomptarget-compile-generic +// RUN: %libomptarget-run-fail-generic 2>&1 \ +// RUN: | %fcheck-generic + +// CHECK: Libomptarget message: Found record of existing mapping with the requested base address 0x{{.*}} but disjoint mapped data, mapping of two distinct chunks of the same object is not allowed. +// CHECK: Libomptarget error: Call to getTargetPointer returned null pointer (device failure or illegal mapping). +// CHECK: Libomptarget fatal error 1: failure of target construct while offloading is mandatory + +#include +#include + +int main() +{ + int *mem = (int *) malloc(32 * sizeof(int)); + int *a = mem; + int *b = mem; + #pragma omp target data map(from: a[0:10], b[20:10]) + { } + + return 0; +}