Index: openmp/libomptarget/include/device.h =================================================================== --- openmp/libomptarget/include/device.h +++ openmp/libomptarget/include/device.h @@ -68,7 +68,8 @@ const uintptr_t HstPtrEnd; // non-inclusive. const map_var_info_t HstPtrName; // Optional source name of mapped variable. - const uintptr_t TgtPtrBegin; // target info. + const uintptr_t TgtAllocBegin; // allocated target memory + const uintptr_t TgtPtrBegin; // mapped target memory = TgtAllocBegin + padding private: static const uint64_t INFRefCount = ~(uint64_t)0; @@ -120,16 +121,18 @@ const std::unique_ptr States; public: - HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB, + HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, + uintptr_t TgtAllocBegin, uintptr_t TgtPtrBegin, bool UseHoldRefCount, map_var_info_t Name = nullptr, bool IsINF = false) : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name), - TgtPtrBegin(TB), States(std::make_unique(UseHoldRefCount ? 0 - : IsINF ? INFRefCount - : 1, - !UseHoldRefCount ? 0 - : IsINF ? INFRefCount - : 1)) {} + TgtAllocBegin(TgtAllocBegin), TgtPtrBegin(TgtPtrBegin), + States(std::make_unique(UseHoldRefCount ? 0 + : IsINF ? INFRefCount + : 1, + !UseHoldRefCount ? 0 + : IsINF ? INFRefCount + : 1)) {} /// Get the total reference count. This is smarter than just getDynRefCount() /// + getHoldRefCount() because it handles the case where at least one is @@ -446,8 +449,8 @@ /// - Data transfer issue fails. TargetPointerResultTy getTargetPointer( HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, - int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, - bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, + int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, + bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr, bool ReleaseHDTTMap = true); Index: openmp/libomptarget/src/device.cpp =================================================================== --- openmp/libomptarget/src/device.cpp +++ openmp/libomptarget/src/device.cpp @@ -90,6 +90,7 @@ /*HstPtrBase=*/(uintptr_t)HstPtrBegin, /*HstPtrBegin=*/(uintptr_t)HstPtrBegin, /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size, + /*TgtAllocBegin=*/(uintptr_t)TgtPtrBegin, /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin, /*UseHoldRefCount=*/false, /*Name=*/nullptr, /*IsRefCountINF=*/true)) @@ -216,10 +217,10 @@ TargetPointerResultTy DeviceTy::getTargetPointer( HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, - int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, - bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, - HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { + int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, + bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, + AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR); LR.TPR.Flags.IsPresent = true; @@ -297,24 +298,28 @@ } else if (Size) { // If it is not contained and Size > 0, we should create a new entry for it. LR.TPR.Flags.IsNewEntry = true; - uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin); + uintptr_t TgtAllocBegin = + (uintptr_t)allocData(TgtPadding + Size, HstPtrBegin); + uintptr_t TgtPtrBegin = TgtAllocBegin + TgtPadding; // Release the mapping table lock only after the entry is locked by // attaching it to TPR. LR.TPR.setEntry(HDTTMap ->emplace(new HostDataToTargetTy( (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, - (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, - HstPtrName)) + (uintptr_t)HstPtrBegin + Size, TgtAllocBegin, + TgtPtrBegin, HasHoldModifier, HstPtrName)) .first->HDTT); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Creating new map entry with HstPtrBase=" DPxMOD - ", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, " - "DynRefCount=%s, HoldRefCount=%s, Name=%s\n", - DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, + ", HstPtrBegin=" DPxMOD ", TgtAllocBegin=" DPxMOD + ", TgtPtrBegin=" DPxMOD + ", Size=%ld, DynRefCount=%s, HoldRefCount=%s, Name=%s\n", + DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(TgtAllocBegin), + DPxPTR(TgtPtrBegin), Size, LR.TPR.getEntry()->dynRefCountToStr().c_str(), LR.TPR.getEntry()->holdRefCountToStr().c_str(), (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - LR.TPR.TargetPointer = (void *)Ptr; + LR.TPR.TargetPointer = (void *)TgtPtrBegin; // Notify the plugin about the new mapping. if (notifyDataMapped(HstPtrBegin, Size)) @@ -490,8 +495,9 @@ int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) { assert(Entry && "Trying to deallocate a null entry."); - DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", - DPxPTR(Entry->TgtPtrBegin), Size); + DP("Deleting tgt data " DPxMOD " of size %" PRId64 " by freeing allocation " + "starting at " DPxMOD "\n", + DPxPTR(Entry->TgtPtrBegin), Size, DPxPTR(Entry->TgtAllocBegin)); void *Event = Entry->getEvent(); if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { @@ -499,7 +505,7 @@ return OFFLOAD_FAIL; } - int Ret = deleteData((void *)Entry->TgtPtrBegin); + int Ret = deleteData((void *)Entry->TgtAllocBegin); // Notify the plugin about the unmapped memory. Ret |= notifyDataUnmapped((void *)Entry->HstPtrBegin); @@ -551,8 +557,8 @@ return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); } -int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) { - return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind); +int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { + return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind); } static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin, Index: openmp/libomptarget/src/omptarget.cpp =================================================================== --- openmp/libomptarget/src/omptarget.cpp +++ openmp/libomptarget/src/omptarget.cpp @@ -208,6 +208,7 @@ (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/, (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/, + (uintptr_t)CurrDeviceEntry->addr /*TgtAllocBegin*/, (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/, false /*UseHoldRefCount*/, CurrHostEntry->name, true /*IsRefCountINF*/)); @@ -602,18 +603,16 @@ // Adjust for proper alignment if this is a combined entry (for structs). // Look at the next argument - if that is MEMBER_OF this one, then this one // is a combined entry. - int64_t Padding = 0; + int64_t TgtPadding = 0; const int NextI = I + 1; if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum && getParentIndex(ArgTypes[NextI]) == I) { int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase); - Padding = (int64_t)HstPtrBegin % Alignment; - if (Padding) { + TgtPadding = (int64_t)HstPtrBegin % Alignment; + if (TgtPadding) { DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD "\n", - Padding, DPxPTR(HstPtrBegin)); - HstPtrBegin = (char *)HstPtrBegin - Padding; - DataSize += Padding; + TgtPadding, DPxPTR(HstPtrBegin)); } } @@ -653,7 +652,7 @@ // PTR_AND_OBJ entry is handled below, and so the allocation might fail // when HasPresentModifier. PointerTpr = Device.getTargetPointer( - HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *), + HDTTMap, HstPtrBase, HstPtrBase, /*TgtPadding=*/0, sizeof(void *), /*HstPtrName=*/nullptr, /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo, @@ -683,8 +682,8 @@ const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; // Note that HDTTMap will be released in getTargetPointer. auto TPR = Device.getTargetPointer( - HDTTMap, HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, - HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, + HDTTMap, HstPtrBegin, HstPtrBase, TgtPadding, DataSize, HstPtrName, + HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry()); void *TgtPtrBegin = TPR.TargetPointer; IsHostPtr = TPR.Flags.IsHostPointer; @@ -890,25 +889,7 @@ } void *HstPtrBegin = Args[I]; - void *HstPtrBase = ArgBases[I]; int64_t DataSize = ArgSizes[I]; - // Adjust for proper alignment if this is a combined entry (for structs). - // Look at the next argument - if that is MEMBER_OF this one, then this one - // is a combined entry. - const int NextI = I + 1; - if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum && - getParentIndex(ArgTypes[NextI]) == I) { - int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase); - int64_t Padding = (int64_t)HstPtrBegin % Alignment; - if (Padding) { - DP("Using a Padding of %" PRId64 " bytes for begin address " DPxMOD - "\n", - Padding, DPxPTR(HstPtrBegin)); - HstPtrBegin = (char *)HstPtrBegin - Padding; - DataSize += Padding; - } - } - bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT; bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) && Index: openmp/libomptarget/test/mapping/padding_not_mapped.c =================================================================== --- /dev/null +++ openmp/libomptarget/test/mapping/padding_not_mapped.c @@ -0,0 +1,58 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +// The host memory layout for the following program looks like this: +// +// | 16 bytes | 8 bytes | 4 bytes | 4 bytes | 8 bytes | +// | s.x | s.y | s.z | +// `------------------------------' +// ^ ^ `-------------------' +// | | ^ +// | | | +// | | `-- libomptarget's device padding for map(s.y,s.z) +// | | maps to 4 bytes or all 12 bytes of this range, +// | | depending on whether s is only 8-byte or at least +// | | 16-byte aligned on the host +// | | +// | `---------------- map(s.x) maps to host memory here that overlaps +// | but is not subsumed by the host memory that +// | map(s.y,s.z) maps to, causing an array extension +// | error no matter which is mapped first +// | +// `------------------ s is at least 8-byte aligned on the host due to s.z +// +// The fix for the array extension error is that libomptarget must not treat its +// padding for map(s.y,s.z) as mapped to host memory. + +#include + +int main() { + struct S { int x[7]; int y; double z; } s; + for (int i = 0; i < 7; ++i) + s.x[i] = i; + s.y = 7; + s.z = 8; + fprintf(stderr, "&s = %p\n", &s); + fprintf(stderr, "&s.x = %p\n", &s.x); + fprintf(stderr, "&s.y = %p\n", &s.y); + fprintf(stderr, "&s.z = %p\n", &s.z); + #pragma omp target data map(s.y, s.z) + #pragma omp target data map(s.x) + ; + #pragma omp target data map(s.x) + #pragma omp target data map(s.y, s.z) + ; + // CHECK: s.x[0] = 0 + // CHECK: s.x[1] = 1 + // CHECK: s.x[2] = 2 + // CHECK: s.x[3] = 3 + // CHECK: s.x[4] = 4 + // CHECK: s.x[5] = 5 + // CHECK: s.x[6] = 6 + // CHECK: s.y = 7 + // CHECK: s.z = 8.0 + for (int i = 0; i < 7; ++i) + printf("s.x[%d] = %d\n", i, s.x[i]); + printf("s.y = %d\n", s.y); + printf("s.z = %.1f\n", s.z); + return 0; +} Index: openmp/libomptarget/test/mapping/power_of_two_alignment.c =================================================================== --- openmp/libomptarget/test/mapping/power_of_two_alignment.c +++ openmp/libomptarget/test/mapping/power_of_two_alignment.c @@ -35,7 +35,9 @@ // padding for s, libomptarget reported an array extension error. collidePost // is never fully contained within that padding (which would avoid the extension // error) because collidePost is 16 bytes while the padding is always less than -// 16 bytes due to the modulo operations. +// 16 bytes due to the modulo operations. (Later, libomptarget was changed not +// to consider padding to be mapped to the host, so it cannot be involved in +// array extension errors.) #include #include