diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -27,6 +27,7 @@ #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" @@ -4661,8 +4662,9 @@ /// subscript it may not have any associated declaration. In that case the /// associated declaration is set to nullptr. class MappableComponent { - /// Expression associated with the component. - Expr *AssociatedExpression = nullptr; + /// Pair of Expression and Non-contiguous pair associated with the + /// component. + llvm::PointerIntPair AssociatedExpressionNonContiguousPr; /// Declaration associated with the declaration. If the component does /// not have a declaration (e.g. array subscripts or section), this is set @@ -4671,15 +4673,23 @@ public: explicit MappableComponent() = default; - explicit MappableComponent(Expr *AssociatedExpression, + explicit MappableComponent(llvm::PointerIntPair + AssociatedExpressionNonContiguousPr, ValueDecl *AssociatedDeclaration) - : AssociatedExpression(AssociatedExpression), + : AssociatedExpressionNonContiguousPr( + AssociatedExpressionNonContiguousPr), AssociatedDeclaration( AssociatedDeclaration ? cast(AssociatedDeclaration->getCanonicalDecl()) : nullptr) {} - Expr *getAssociatedExpression() const { return AssociatedExpression; } + Expr *getAssociatedExpression() const { + return AssociatedExpressionNonContiguousPr.getPointer(); + } + + bool getNonContiguous() const { + return AssociatedExpressionNonContiguousPr.getInt(); + } ValueDecl *getAssociatedDeclaration() const { return AssociatedDeclaration; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7820,6 +7820,10 @@ /// Close is a hint to the runtime to allocate memory close to /// the target device. OMP_MAP_CLOSE = 0x400, + /// Signal that the runtime library should use args as an array of + /// descriptor_dim pointers and use args_size as dims. Used when we have + /// non-contiguous list items in target update directive + OMP_MAP_DESCRIPTOR = 0x800, /// The 16 MSBs of the flags indicate whether the entry is member of some /// struct/class. OMP_MAP_MEMBER_OF = 0xffff000000000000, @@ -7855,6 +7859,8 @@ using MapBaseValuesArrayTy = SmallVector; using MapValuesArrayTy = SmallVector; using MapFlagsArrayTy = SmallVector; + using MapDimArrayTy = SmallVector; + using MapNonContiguousArrayTy = SmallVector; /// Map between a struct and the its lowest & highest elements which have been /// mapped. @@ -7881,8 +7887,8 @@ MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) + ArrayRef MapModifiers, bool ReturnDevicePointer, + bool IsImplicit) : Components(Components), MapType(MapType), MapModifiers(MapModifiers), ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} }; @@ -7998,9 +8004,11 @@ /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - OpenMPOffloadMappingFlags getMapTypeBits( - OpenMPMapClauseKind MapType, ArrayRef MapModifiers, - bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { + OpenMPOffloadMappingFlags + getMapTypeBits(OpenMPMapClauseKind MapType, + ArrayRef MapModifiers, bool IsImplicit, + bool AddPtrFlag, bool AddIsTargetParamFlag, + bool IsNonContiguous) const { OpenMPOffloadMappingFlags Bits = IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; switch (MapType) { @@ -8036,6 +8044,8 @@ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) != MapModifiers.end()) Bits |= OMP_MAP_CLOSE; + if (IsNonContiguous) + Bits |= OMP_MAP_DESCRIPTOR; return Bits; } @@ -8083,15 +8093,15 @@ /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( - OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, + OpenMPMapClauseKind MapType, ArrayRef MapModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, MapDimArrayTy &Dims, StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, bool IsImplicit, ArrayRef - OverlappedElements = llvm::None) const { + OverlappedElements = llvm::None, + bool IsNonContiguous = false) const { // The following summarizes what has to be generated for each map and the // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags @@ -8338,6 +8348,9 @@ // whether we are dealing with a member of a declared struct. const MemberExpr *EncounteredME = nullptr; + // Track for the total number of dimension. + uint64_t DimSize = 0; + for (; I != CE; ++I) { // If the current component is member of a struct (parent struct) mark it. if (!EncounteredME) { @@ -8356,8 +8369,11 @@ // becomes the base address for the following components. // A final array section, is one whose length can't be proved to be one. + // If the map item is non-contiguous then we don't treat any array section + // as final array section. bool IsFinalArraySection = - isFinalArraySectionExpression(I->getAssociatedExpression()); + isFinalArraySectionExpression(I->getAssociatedExpression()) && + (!IsNonContiguous); // Get information on whether the element is a pointer. Have to do a // special treatment for array sections given that they are built-in @@ -8374,7 +8390,10 @@ .getCanonicalType() ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); - bool IsNonDerefPointer = IsPointer && !UO && !BO; + bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; + + if (OASE) + DimSize++; if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { // If this is not the last component, we expect the pointer to be @@ -8430,7 +8449,8 @@ OMP_MAP_MEMBER_OF | getMapTypeBits(MapType, MapModifiers, IsImplicit, /*AddPtrFlag=*/false, - /*AddIsTargetParamFlag=*/false); + /*AddIsTargetParamFlag=*/false, + /*IsNonContiguous=*/IsNonContiguous); LB = BP; llvm::Value *Size = nullptr; // Do bitcopy of all non-overlapped structure elements. @@ -8454,6 +8474,7 @@ Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); Types.push_back(Flags); + Dims.push_back(IsNonContiguous ? DimSize : 0); LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } BasePointers.push_back(BP.getPointer()); @@ -8465,6 +8486,7 @@ Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); Types.push_back(Flags); + Dims.push_back(IsNonContiguous ? DimSize : 0); break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); @@ -8473,15 +8495,17 @@ Pointers.push_back(LB.getPointer()); Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); + Dims.push_back(IsNonContiguous ? DimSize : 0); // We need to add a pointer flag for each map that comes from the // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). - OpenMPOffloadMappingFlags Flags = getMapTypeBits( - MapType, MapModifiers, IsImplicit, - !IsExpressionFirstInfo || RequiresReference, - IsCaptureFirstInfo && !RequiresReference); + OpenMPOffloadMappingFlags Flags = + getMapTypeBits(MapType, MapModifiers, IsImplicit, + !IsExpressionFirstInfo || RequiresReference, + IsCaptureFirstInfo && !RequiresReference, + /*IsNonContiguous=*/IsNonContiguous); if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, @@ -8536,6 +8560,158 @@ } } + /// Generate the base pointers, section pointers, sizes , map type bits, + /// dimension size, offset, count, and strides for the provided map type, map + /// modifier, and expression components. \a IsFirstComponent should be set to + /// true if the provided set of components is the first associated with a + /// capture. + void generateInfoForTargetDataComponentList( + OpenMPMapClauseKind MapType, ArrayRef MapModifiers, + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, MapDimArrayTy &Dims, + MapNonContiguousArrayTy &Offsets, MapNonContiguousArrayTy &Counts, + MapNonContiguousArrayTy &Strides, StructRangeInfoTy &PartialStruct, + bool IsFirstComponentList, bool IsImplicit, + ArrayRef + OverlappedElements = llvm::None) const { + + generateInfoForComponentList(MapType, MapModifiers, Components, + BasePointers, Pointers, Sizes, Types, Dims, + PartialStruct, IsFirstComponentList, + IsImplicit, OverlappedElements, true); + + const ASTContext &Context = CGF.getContext(); + + MapValuesArrayTy CurOffsets; + MapValuesArrayTy CurCounts; + MapValuesArrayTy CurStrides; + llvm::Value *CurStride = nullptr; + + // Collect Size information for each dimension and get the element size as + // the first Stride. For example, for `int arr[10][10]`, the DimSizes + // should be [10, 10] and the first stride is 4 btyes. + SmallVector DimSizes; + + for (auto CI = Components.begin(), CE = Components.end(); CI != CE; ++CI) { + const Expr *AssocExpr = CI->getAssociatedExpression(); + const auto *AE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); + if (AE || OASE) { + QualType Ty; + if (OASE) + Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + else + Ty = AE->getType(); + auto *CAT = Context.getAsConstantArrayType(Ty); + auto *VAT = Context.getAsVariableArrayType(Ty); + // Get element size if CurStrides is empty. + if (CurStrides.empty()) { + const Type *ElementType = nullptr; + uint64_t ElementTypeSize; + if (CAT) { + ElementType = CAT->getElementType().getTypePtr(); + ElementTypeSize = + Context.getTypeSizeInChars(ElementType).getQuantity(); + } else if (VAT) { + ElementType = VAT->getElementType().getTypePtr(); + ElementTypeSize = + Context.getTypeSizeInChars(ElementType).getQuantity(); + } + if (ElementType) + CurStrides.push_back( + llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); + } + // Get dimension value. + llvm::Value *SizeV = nullptr; + if (CAT) { + llvm::APInt Size = CAT->getSize(); + SizeV = llvm::ConstantInt::get(CGF.SizeTy, Size); + } else if (VAT) { + const Expr *Size = VAT->getSizeExpr(); + SizeV = CGF.EmitScalarExpr(Size); + } + assert((VAT || CAT || CI == Components.begin()) && + "Should be either ConstantArray or VariableArra if not the " + "first Component"); + // We don't need the last dimension size for computing stride. + if (SizeV && DimSizes.size() < Components.size() - 1) + DimSizes.push_back(CGF.Builder.CreateIntCast(SizeV, CGF.Int64Ty, + /*IsSigned=*/false)); + } + } + + // Scan the components from the base to the complete expression. + auto CI = Components.begin(); + auto CE = Components.end(); + auto I = CI; + auto DI = DimSizes.begin(); + + // Collect info for non-contiguous. Notice that offset, count, and stride + // are only meaningful for array-section, so we insert a null for anything + // other than array-section. + // Also, the size of offset, count, and stride are not the same as pointers, + // base_pointers, sizes, or dims. Instead, the size of offset, count, and + // stride are the same as the number of non-contiguous declaration in target + // update to/from clause. + for (; I != CE; ++I) { + const Expr *AssocExpr = I->getAssociatedExpression(); + const auto *AE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); + + if (OASE || AE) { + // Offset + const Expr *OffsetExpr = nullptr; + if (OASE) + OffsetExpr = OASE->getLowerBound(); + else + OffsetExpr = AE->getIdx(); + llvm::Value *Offset = nullptr; + if (!OffsetExpr) { + // If offset is absent, then we just set it to zero. + Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); + } else { + Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), + CGF.Int64Ty, + /*isSigned=*/false); + } + CurOffsets.push_back(Offset); + + // Count + const Expr *CountExpr = nullptr; + if (OASE) + CountExpr = OASE->getLength(); + llvm::Value *Count = nullptr; + if (!CountExpr) { + // If length is absent then we calculate it as (Total length - + // lower_bound) + Count = CGF.Builder.CreateNUWSub(*DI, Offset); + } else { + Count = CGF.EmitScalarExpr(CountExpr); + } + Count = + CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); + CurCounts.push_back(Count); + + // Stride = previous stride * previous dimension size + // Take `int arr[5][10]` and `arr[0:2][0:2]` as an example: + // Dimension 1 Dimension 0 + // Offset 0 0 + // Count 2 2 + // Stride 40 bytes (4x10) 4 bytes (int) + if (DI != DimSizes.end()) { + CurStride = + CGF.Builder.CreateNUWMul(CurStrides.back(), *DI++); + CurStrides.push_back(CurStride); + } + } + } + + Offsets.push_back(CurOffsets); + Counts.push_back(CurCounts); + Strides.push_back(CurStrides); + } + /// Return the adjusted map modifiers if the declaration a capture refers to /// appears in a first-private clause. This is expected to be used only with /// directives that start with 'target'. @@ -8714,7 +8890,10 @@ /// index where it occurs is appended to the device pointers info array. void generateAllInfo(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + MapFlagsArrayTy &Types, MapDimArrayTy &Dims, + MapNonContiguousArrayTy &Offsets, + MapNonContiguousArrayTy &Counts, + MapNonContiguousArrayTy &Strides) const { // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. @@ -8824,6 +9003,10 @@ MapValuesArrayTy CurPointers; MapValuesArrayTy CurSizes; MapFlagsArrayTy CurTypes; + MapDimArrayTy CurDims; + MapNonContiguousArrayTy CurOffsets; + MapNonContiguousArrayTy CurCounts; + MapNonContiguousArrayTy CurStrides; StructRangeInfoTy PartialStruct; for (const MapInfo &L : M.second) { @@ -8832,10 +9015,18 @@ // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, - CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + if (L.Components.back().getNonContiguous()) { + generateInfoForTargetDataComponentList( + L.MapType, L.MapModifiers, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, CurDims, CurOffsets, CurCounts, + CurStrides, PartialStruct, IsFirstComponentList, L.IsImplicit); + } else { + // Indicate that we do not do the special non-contiguous codegen + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, CurDims, PartialStruct, + IsFirstComponentList, L.IsImplicit); + } // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8875,15 +9066,24 @@ // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) + if (PartialStruct.Base.isValid()) { + // Make sure Dims have the same size as BP, P, Sizes, and Types. + // Put 0 here to make sure that `emitTargetDataOffloadingArrays` use it + // to skip this one. + CurDims.push_back(0); emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, PartialStruct); + } // We need to append the results of this capture to what we already have. BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); Pointers.append(CurPointers.begin(), CurPointers.end()); Sizes.append(CurSizes.begin(), CurSizes.end()); Types.append(CurTypes.begin(), CurTypes.end()); + Dims.append(CurDims.begin(), CurDims.end()); + Offsets.append(CurOffsets.begin(), CurOffsets.end()); + Counts.append(CurCounts.begin(), CurCounts.end()); + Strides.append(CurStrides.begin(), CurStrides.end()); } } @@ -8933,6 +9133,7 @@ MapValuesArrayTy CurPointers; MapValuesArrayTy CurSizes; MapFlagsArrayTy CurTypes; + MapDimArrayTy CurDims; StructRangeInfoTy PartialStruct; for (const MapInfo &L : M.second) { @@ -8940,7 +9141,7 @@ "Not expecting declaration with no component lists."); generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, + CurTypes, CurDims, PartialStruct, IsFirstComponentList, L.IsImplicit); IsFirstComponentList = false; } @@ -9059,6 +9260,7 @@ MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + MapDimArrayTy &Dims, StructRangeInfoTy &PartialStruct) const { assert(!Cap->capturesVariableArrayType() && "Not expecting to generate map info for a variable array type!"); @@ -9208,7 +9410,7 @@ OverlappedComponents = Pair.getSecond(); bool IsFirstComponentList = true; generateInfoForComponentList(MapType, MapModifiers, Components, - BasePointers, Pointers, Sizes, Types, + BasePointers, Pointers, Sizes, Types, Dims, PartialStruct, IsFirstComponentList, IsImplicit, OverlappedComponents); } @@ -9222,10 +9424,9 @@ std::tie(Components, MapType, MapModifiers, IsImplicit) = L; auto It = OverlappedData.find(&L); if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, Components, - BasePointers, Pointers, Sizes, Types, - PartialStruct, IsFirstComponentList, - IsImplicit); + generateInfoForComponentList( + MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, + Types, Dims, PartialStruct, IsFirstComponentList, IsImplicit); IsFirstComponentList = false; } } @@ -9235,7 +9436,8 @@ void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + MapFlagsArrayTy &Types, + MapDimArrayTy &Dims) const { assert(CurDir.is() && "Expect a executable directive"); const auto *CurExecDir = CurDir.get(); @@ -9256,7 +9458,7 @@ StructRangeInfoTy PartialStruct; generateInfoForComponentList( C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, - Pointers, Sizes, Types, PartialStruct, + Pointers, Sizes, Types, Dims, PartialStruct, /*IsFirstComponentList=*/true, C->isImplicit()); assert(!PartialStruct.Base.isValid() && "No partial structs for declare target link expected."); @@ -9350,16 +9552,15 @@ }; } // anonymous namespace -/// Emit the arrays used to pass the captures and map information to the -/// offloading runtime library. If there is no map or capture information, -/// return nullptr by reference. static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, - CGOpenMPRuntime::TargetDataInfo &Info) { + MappableExprsHandler::MapDimArrayTy &Dims, + CGOpenMPRuntime::TargetDataInfo &Info, + bool IsNonContiguous = false) { CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGF.getContext(); @@ -9402,8 +9603,14 @@ // We expect all the sizes to be constant, so we collect them to create // a constant array. SmallVector ConstSizes; - for (llvm::Value *S : Sizes) - ConstSizes.push_back(cast(S)); + for (unsigned I = 0, E = Sizes.size(); I < E; ++I) { + if (IsNonContiguous && + (MapTypes[I] & MappableExprsHandler::OMP_MAP_DESCRIPTOR)) { + ConstSizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, Dims[I])); + } else { + ConstSizes.push_back(cast(Sizes[I])); + } + } auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); @@ -9469,6 +9676,89 @@ } } +/// Emit the arrays used to pass the captures and map information to the +/// offloading runtime library. If there is no map or capture information, +/// return nullptr by reference. +static void emitTargetDataOffloadingArrays( + CodeGenFunction &CGF, + MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, + MappableExprsHandler::MapValuesArrayTy &Pointers, + MappableExprsHandler::MapValuesArrayTy &Sizes, + MappableExprsHandler::MapFlagsArrayTy &MapTypes, + MappableExprsHandler::MapDimArrayTy &Dims, + MappableExprsHandler::MapNonContiguousArrayTy &Offsets, + MappableExprsHandler::MapNonContiguousArrayTy &Counts, + MappableExprsHandler::MapNonContiguousArrayTy &Strides, + CGOpenMPRuntime::TargetDataInfo &Info) { + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Dims, Info, + true); + + if (Offsets.empty()) + return; + + ASTContext &C = CGF.getContext(); + CodeGenModule &CGM = CGF.CGM; + + // Build an array of struct descriptor_dim and then assign it to offload_args. + if (Info.NumberOfPtrs) { + // Build struct descriptor_dim { + // int64_t offset; + // int64_t count; + // int64_t stride + // }; + QualType Int64Ty = + C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + RecordDecl *RD; + RD = C.buildImplicitRecord("descriptor_dim"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + RD->completeDefinition(); + QualType DimTy = C.getRecordType(RD); + + enum { OffsetFD = 0, CountFD, StrideFD }; + // The reason we need two index variable here is because the size of "Dims" + // is the same as the size of Components, however, the size of offset, count + // , and stride is equal to the size of base declaration that is + // non-contiguous. + for (unsigned I = 0, L = 0, E = Offsets.size(); I < E; ++I) { + if (Dims[I] == 0) + continue; + llvm::APInt Size(/*numBits=*/32, Dims[I]); + QualType ArrayTy = + C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); + Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); + for (unsigned II = 0, EE = Dims[I]; II < EE; ++II) { + unsigned RevIdx = EE - II - 1; + LValue DimsLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); + // Offset + LValue OffsetLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), OffsetFD)); + CGF.EmitStoreOfScalar(Offsets[L][RevIdx], OffsetLVal); + // Count + LValue CountLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), CountFD)); + CGF.EmitStoreOfScalar(Counts[L][RevIdx], CountLVal); + // Stride + LValue StrideLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), StrideFD)); + CGF.EmitStoreOfScalar(Strides[L][RevIdx], StrideLVal); + } + // args[I] = &dims + Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + DimsAddr, CGM.Int8PtrTy); + llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.PointersArray, 0, I); + Address PAddr(P, C.getTypeAlignInChars(C.VoidPtrTy)); + CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); + ++L; + } + } +} + /// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( @@ -10142,6 +10432,7 @@ MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapDimArrayTy Dims; // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); @@ -10156,6 +10447,7 @@ MappableExprsHandler::MapValuesArrayTy CurPointers; MappableExprsHandler::MapValuesArrayTy CurSizes; MappableExprsHandler::MapFlagsArrayTy CurMapTypes; + MappableExprsHandler::MapDimArrayTy CurDims; MappableExprsHandler::StructRangeInfoTy PartialStruct; // VLA sizes are passed to the outlined region by copy and do not have map @@ -10173,7 +10465,8 @@ // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, - CurSizes, CurMapTypes, PartialStruct); + CurSizes, CurMapTypes, CurDims, + PartialStruct); if (CurBasePointers.empty()) MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); @@ -10210,11 +10503,12 @@ // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, - MapTypes); + MapTypes, Dims); TargetDataInfo Info; // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Dims, + Info); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, Info); @@ -10812,13 +11106,19 @@ MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapDimArrayTy Dims; + MappableExprsHandler::MapNonContiguousArrayTy Offsets; + MappableExprsHandler::MapNonContiguousArrayTy Counts; + MappableExprsHandler::MapNonContiguousArrayTy Strides; // Get map clause information. MappableExprsHandler MCHandler(D, CGF); - MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes, Dims, + Offsets, Counts, Strides); // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitTargetDataOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, + Dims, Offsets, Counts, Strides, Info); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; @@ -11048,14 +11348,20 @@ MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapDimArrayTy Dims; + MappableExprsHandler::MapNonContiguousArrayTy Offsets; + MappableExprsHandler::MapNonContiguousArrayTy Counts; + MappableExprsHandler::MapNonContiguousArrayTy Strides; // Get map clause information. MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes, Dims, + Offsets, Counts, Strides); TargetDataInfo Info; // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitTargetDataOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, + Dims, Offsets, Counts, Strides, Info); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, Info); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -47,7 +47,7 @@ static const Expr *checkMapClauseExpressionBase( Sema &SemaRef, Expr *E, OMPClauseMappableExprCommon::MappableExprComponentList &CurComponents, - OpenMPClauseKind CKind, bool NoDiagnose); + OpenMPClauseKind CKind, OpenMPDirectiveKind DKind, bool NoDiagnose); namespace { /// Default data sharing attributes, which can be applied to directive. @@ -3528,6 +3528,7 @@ if (isOpenMPTargetExecutionDirective(DKind)) { OMPClauseMappableExprCommon::MappableExprComponentList CurComponents; if (!checkMapClauseExpressionBase(SemaRef, E, CurComponents, OMPC_map, + Stack->getCurrentDirective(), /*NoDiagnose=*/true)) return; const auto *VD = cast( @@ -16350,11 +16351,14 @@ class MapBaseChecker final : public StmtVisitor { Sema &SemaRef; OpenMPClauseKind CKind = OMPC_unknown; + OpenMPDirectiveKind DKind = OMPD_unknown; OMPClauseMappableExprCommon::MappableExprComponentList &Components; + bool IsNonContiguous; bool NoDiagnose = false; const Expr *RelevantExpr = nullptr; bool AllowUnitySizeArraySection = true; bool AllowWholeSizeArraySection = true; + bool AllowAnotherPtr = true; SourceLocation ELoc; SourceRange ERange; @@ -16378,8 +16382,9 @@ } assert(!RelevantExpr && "RelevantExpr is expected to be nullptr"); RelevantExpr = DRE; + llvm::PointerIntPair Pr(DRE, IsNonContiguous); // Record the component. - Components.emplace_back(DRE, DRE->getDecl()); + Components.emplace_back(Pr, DRE->getDecl()); return true; } @@ -16451,7 +16456,8 @@ AllowWholeSizeArraySection = false; // Record the component. - Components.emplace_back(ME, FD); + llvm::PointerIntPair Pr(ME, IsNonContiguous); + Components.emplace_back(Pr, FD); return RelevantExpr || Visit(E); } @@ -16489,7 +16495,8 @@ } // Record the component - we don't have any declaration associated. - Components.emplace_back(AE, nullptr); + llvm::PointerIntPair Pr(AE, /*IsNonContiguous=*/false); + Components.emplace_back(Pr, nullptr); return RelevantExpr || Visit(E); } @@ -16528,6 +16535,13 @@ // pointer. Otherwise, only unitary sections are accepted. if (NotWhole || IsPointer) AllowWholeSizeArraySection = false; + } else if (DKind == OMPD_target_update && + SemaRef.getLangOpts().OpenMP >= 50) { + if (IsPointer && !AllowAnotherPtr) { + SemaRef.Diag(ELoc, diag::err_omp_section_length_undefined) << true; + } else { + IsNonContiguous = true; + } } else if (AllowUnitySizeArraySection && NotUnity) { // A unity or whole array section is not allowed and that is not // compatible with the properties of the current array section. @@ -16537,6 +16551,9 @@ return false; } + if (IsPointer) + AllowAnotherPtr = false; + if (const auto *TE = dyn_cast(E)) { Expr::EvalResult ResultR; Expr::EvalResult ResultL; @@ -16562,14 +16579,16 @@ } // Record the component - we don't have any declaration associated. - Components.emplace_back(OASE, nullptr); + llvm::PointerIntPair Pr(OASE, /*IsNonContiguous=*/false); + Components.emplace_back(Pr, nullptr); return RelevantExpr || Visit(E); } bool VisitOMPArrayShapingExpr(OMPArrayShapingExpr *E) { Expr *Base = E->getBase(); // Record the component - we don't have any declaration associated. - Components.emplace_back(E, nullptr); + llvm::PointerIntPair Pr(E, /*IsNonContiguous=*/false); + Components.emplace_back(Pr, nullptr); return Visit(Base->IgnoreParenImpCasts()); } @@ -16582,7 +16601,8 @@ } if (!RelevantExpr) { // Record the component if haven't found base decl. - Components.emplace_back(UO, nullptr); + llvm::PointerIntPair Pr(UO, /*IsNonContiguous=*/false); + Components.emplace_back(Pr, nullptr); } return RelevantExpr || Visit(UO->getSubExpr()->IgnoreParenImpCasts()); } @@ -16598,7 +16618,8 @@ // know the other subtree is just an offset) Expr *LE = BO->getLHS()->IgnoreParenImpCasts(); Expr *RE = BO->getRHS()->IgnoreParenImpCasts(); - Components.emplace_back(BO, nullptr); + llvm::PointerIntPair Pr(BO, /*IsNonContiguous=*/false); + Components.emplace_back(Pr, nullptr); assert((LE->getType().getTypePtr() == BO->getType().getTypePtr() || RE->getType().getTypePtr() == BO->getType().getTypePtr()) && "Either LHS or RHS have base decl inside"); @@ -16609,7 +16630,8 @@ bool VisitCXXThisExpr(CXXThisExpr *CTE) { assert(!RelevantExpr && "RelevantExpr is expected to be nullptr"); RelevantExpr = CTE; - Components.emplace_back(CTE, nullptr); + llvm::PointerIntPair Pr(CTE, IsNonContiguous); + Components.emplace_back(Pr, nullptr); return true; } bool VisitStmt(Stmt *) { @@ -16620,10 +16642,10 @@ return RelevantExpr; } explicit MapBaseChecker( - Sema &SemaRef, OpenMPClauseKind CKind, + Sema &SemaRef, OpenMPClauseKind CKind, OpenMPDirectiveKind DKind, OMPClauseMappableExprCommon::MappableExprComponentList &Components, bool NoDiagnose, SourceLocation &ELoc, SourceRange &ERange) - : SemaRef(SemaRef), CKind(CKind), Components(Components), + : SemaRef(SemaRef), CKind(CKind), DKind(DKind), Components(Components), NoDiagnose(NoDiagnose), ELoc(ELoc), ERange(ERange) {} }; } // namespace @@ -16635,10 +16657,10 @@ static const Expr *checkMapClauseExpressionBase( Sema &SemaRef, Expr *E, OMPClauseMappableExprCommon::MappableExprComponentList &CurComponents, - OpenMPClauseKind CKind, bool NoDiagnose) { + OpenMPClauseKind CKind, OpenMPDirectiveKind DKind, bool NoDiagnose) { SourceLocation ELoc = E->getExprLoc(); SourceRange ERange = E->getSourceRange(); - MapBaseChecker Checker(SemaRef, CKind, CurComponents, NoDiagnose, ELoc, + MapBaseChecker Checker(SemaRef, CKind, DKind, CurComponents, NoDiagnose, ELoc, ERange); if (Checker.Visit(E->IgnoreParens())) return Checker.getFoundBase(); @@ -17118,7 +17140,8 @@ // Obtain the array or member expression bases if required. Also, fill the // components array with all the components identified in the process. const Expr *BE = checkMapClauseExpressionBase( - SemaRef, SimpleExpr, CurComponents, CKind, /*NoDiagnose=*/false); + SemaRef, SimpleExpr, CurComponents, CKind, DSAS->getCurrentDirective(), + /*NoDiagnose=*/false); if (!BE) continue; @@ -18366,8 +18389,9 @@ // only need a component. MVLI.VarBaseDeclarations.push_back(D); MVLI.VarComponents.resize(MVLI.VarComponents.size() + 1); + llvm::PointerIntPair SimpleRefExprPr(SimpleRefExpr, false); MVLI.VarComponents.back().push_back( - OMPClauseMappableExprCommon::MappableComponent(SimpleRefExpr, D)); + OMPClauseMappableExprCommon::MappableComponent(SimpleRefExprPr, D)); } if (MVLI.ProcessedVarList.empty()) @@ -18433,7 +18457,8 @@ // Store the components in the stack so that they can be used to check // against other clauses later on. - OMPClauseMappableExprCommon::MappableComponent MC(SimpleRefExpr, D); + llvm::PointerIntPair SimpleRefExprPr(SimpleRefExpr, false); + OMPClauseMappableExprCommon::MappableComponent MC(SimpleRefExprPr, D); DSAStack->addMappableExpressionComponents( D, MC, /*WhereFoundClauseKind=*/OMPC_is_device_ptr); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -12476,10 +12476,11 @@ SmallVector Components; Components.reserve(TotalComponents); for (unsigned i = 0; i < TotalComponents; ++i) { - Expr *AssociatedExpr = Record.readExpr(); + llvm::PointerIntPair AssociatedExprPr(Record.readExpr(), + false); auto *AssociatedDecl = Record.readDeclAs(); Components.push_back(OMPClauseMappableExprCommon::MappableComponent( - AssociatedExpr, AssociatedDecl)); + AssociatedExprPr, AssociatedDecl)); } C->setComponents(Components, ListSizes); } @@ -12593,10 +12594,11 @@ SmallVector Components; Components.reserve(TotalComponents); for (unsigned i = 0; i < TotalComponents; ++i) { - Expr *AssociatedExpr = Record.readSubExpr(); + llvm::PointerIntPair AssociatedExprPr(Record.readSubExpr(), + Record.readBool()); auto *AssociatedDecl = Record.readDeclAs(); Components.push_back(OMPClauseMappableExprCommon::MappableComponent( - AssociatedExpr, AssociatedDecl)); + AssociatedExprPr, AssociatedDecl)); } C->setComponents(Components, ListSizes); } @@ -12643,10 +12645,11 @@ SmallVector Components; Components.reserve(TotalComponents); for (unsigned i = 0; i < TotalComponents; ++i) { - Expr *AssociatedExpr = Record.readSubExpr(); + llvm::PointerIntPair AssociatedExprPr(Record.readSubExpr(), + Record.readBool()); auto *AssociatedDecl = Record.readDeclAs(); Components.push_back(OMPClauseMappableExprCommon::MappableComponent( - AssociatedExpr, AssociatedDecl)); + AssociatedExprPr, AssociatedDecl)); } C->setComponents(Components, ListSizes); } @@ -12693,10 +12696,11 @@ SmallVector Components; Components.reserve(TotalComponents); for (unsigned i = 0; i < TotalComponents; ++i) { - Expr *AssociatedExpr = Record.readSubExpr(); + llvm::PointerIntPair AssociatedExprPr(Record.readSubExpr(), + false); auto *AssociatedDecl = Record.readDeclAs(); Components.push_back(OMPClauseMappableExprCommon::MappableComponent( - AssociatedExpr, AssociatedDecl)); + AssociatedExprPr, AssociatedDecl)); } C->setComponents(Components, ListSizes); } @@ -12736,10 +12740,11 @@ SmallVector Components; Components.reserve(TotalComponents); for (unsigned i = 0; i < TotalComponents; ++i) { - Expr *AssociatedExpr = Record.readSubExpr(); + llvm::PointerIntPair AssociatedExprPr(Record.readSubExpr(), + false); auto *AssociatedDecl = Record.readDeclAs(); Components.push_back(OMPClauseMappableExprCommon::MappableComponent( - AssociatedExpr, AssociatedDecl)); + AssociatedExprPr, AssociatedDecl)); } C->setComponents(Components, ListSizes); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6572,6 +6572,7 @@ Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); + Record.push_back(M.getNonContiguous()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } @@ -6596,6 +6597,7 @@ Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); + Record.push_back(M.getNonContiguous()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } diff --git a/clang/test/OpenMP/target_update_ast_print.cpp b/clang/test/OpenMP/target_update_ast_print.cpp --- a/clang/test/OpenMP/target_update_ast_print.cpp +++ b/clang/test/OpenMP/target_update_ast_print.cpp @@ -5,6 +5,14 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + +// RUN: %clang_cc1 -DOMP5 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s --check-prefix=OMP5 +// RUN: %clang_cc1 -DOMP5 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s --check-prefix=OMP5 + +// RUN: %clang_cc1 -DOMP5 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s --check-prefix=OMP5 +// RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s --check-prefix=OMP5 // expected-no-diagnostics #ifndef HEADER @@ -20,23 +28,64 @@ #pragma omp target update to(([a][targ])p, a) if(l>5) device(l) nowait depend(inout:l) #pragma omp target update from(b, ([a][targ])p) if(l<5) device(l-1) nowait depend(inout:l) + +#ifdef OMP5 + U marr[10][10][10]; +#pragma omp target update to(marr[2][0:2][0:2]) + +#pragma omp target update from(marr[2][0:2][0:2]) + +#pragma omp target update to(marr[:][0:2][0:2]) + +#pragma omp target update from(marr[:][0:2][0:2]) + +#pragma omp target update to(marr[:][:l][l:]) + +#pragma omp target update from(marr[:][:l][l:]) + +#pragma omp target update to(marr[:2][:1][:]) + +#pragma omp target update from(marr[:2][:1][:]) + +#pragma omp target update to(marr[:2][:][:1]) + +#pragma omp target update from(marr[:2][:][:1]) + +#pragma omp target update to(marr[:2][:][1:]) + +#pragma omp target update from(marr[:2][:][1:]) + +#pragma omp target update to(marr[:1][3:2][:2]) + +#pragma omp target update from(marr[:1][3:2][:2]) + +#pragma omp target update to(marr[:1][:2][0]) + +#pragma omp target update from(marr[:1][:2][0]) + +// OMP5: marr[10][10][10]; +// OMP5-NEXT: #pragma omp target update to(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:][:l][l:]) +// OMP5-NEXT: #pragma omp target update from(marr[:][:l][l:]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:1][:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:1][:]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][:2][0]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][:2][0]) +#endif + return a + targ + (T)b; } // CHECK: static T a, *p; // CHECK-NEXT: U b; -// CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l){{$}} -// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) -// CHECK: static int a, *p; -// CHECK-NEXT: float b; -// CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l) -// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) -// CHECK: static char a, *p; -// CHECK-NEXT: float b; -// CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l) -// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) int main(int argc, char **argv) { static int a; @@ -50,6 +99,40 @@ // CHECK-NEXT: #pragma omp target update to(a) if(f > 0.) device(n) nowait depend(in : n) #pragma omp target update from(f) if(f<0.0) device(n+1) nowait depend(in:n) // CHECK-NEXT: #pragma omp target update from(f) if(f < 0.) device(n + 1) nowait depend(in : n) + +#ifdef OMP5 + float marr[10][10][10]; +// OMP5: marr[10][10][10]; +#pragma omp target update to(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[2][0:2][0:2]) +#pragma omp target update from(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[2][0:2][0:2]) +#pragma omp target update to(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:][0:2][0:2]) +#pragma omp target update from(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:][0:2][0:2]) +#pragma omp target update to(marr[:][:n][n:]) +// OMP5: #pragma omp target update to(marr[:][:n][n:]) +#pragma omp target update from(marr[:2][:1][:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:1][:]) +#pragma omp target update to(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][:1]) +#pragma omp target update from(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][:1]) +#pragma omp target update to(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][1:]) +#pragma omp target update from(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][1:]) +#pragma omp target update to(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][3:2][:2]) +#pragma omp target update from(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][3:2][:2]) +#pragma omp target update to(marr[:1][:2][0]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][:2][0]) +#pragma omp target update from(marr[:1][:2][0]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][:2][0]) +#endif + return foo(argc, f) + foo(argv[0][0], f) + a; } diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp --- a/clang/test/OpenMP/target_update_codegen.cpp +++ b/clang/test/OpenMP/target_update_codegen.cpp @@ -1059,5 +1059,283 @@ #pragma omp target update from(([sa][5])f) } +#endif + +///==========================================================================/// +// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-64 +// RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-64 +// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-32 +// RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-32 + +// RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// SIMD-ONLY19-NOT: {{__kmpc|__tgt}} +#ifdef CK19 + +// CK19: [[STRUCT_DESCRIPTOR:%.+]] = type { i64, i64, i64 } + +// CK19: [[MSIZE:@.+]] = {{.+}}constant [1 x i64] [i64 3] +// CK19: [[MTYPE:@.+]] = {{.+}}constant [1 x i64] [i64 2081] + +// CK19-LABEL: _Z3foo +void foo(int arg) { + int arr[3][4][5]; + + // CK19: [[DIMS:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]], + // CK19: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [5 x i32]]], [3 x [4 x [5 x i32]]]* [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK19: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [5 x i32]], [4 x [5 x i32]]* [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 + // CK19: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x i32], [5 x i32]* [[ARRAY_DECAY]], {{.+}} + // CK19: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x i32], [5 x i32]* [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0 + // CK19: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds {{.+}}, {{.+}}* [[ARRAY_DECAY_2]], {{.+}} 1 + // CK19: [[LEN:%.+]] = sub nuw i64 4, [[ARG_ADDR:%.+]] + // CK19: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i{{.+}} 0, i{{.+}} 0 + // CK19: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i{{.+}} 0, i{{.+}} 0 + // CK19: [[DIM_1:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 0 + // CK19: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 0 + // CK19: store i64 0, i64* [[OFFSET]], + // CK19: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 1 + // CK19: store i64 2, i64* [[COUNT]], + // CK19: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 2 + // CK19: store i64 80, i64* [[STRIDE]], + // CK19: [[DIM_2:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 1 + // CK19: [[OFFSET_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 0 + // CK19: store i64 [[ARG:%.+]], i64* [[OFFSET_2]], + // CK19: [[COUNT_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 1 + // CK19: store i64 [[LEN]], i64* [[COUNT_2]], + // CK19: [[STRIDE_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 2 + // CK19: store i64 20, i64* [[STRIDE_2]], + // CK19: [[DIM_3:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 2 + // CK19: [[OFFSET_3:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 0 + // CK19: store i64 1, i64* [[OFFSET_3]], + // CK19: [[COUNT_3:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 1 + // CK19: store i64 4, i64* [[COUNT_3]], + // CK19: [[STRIDE_3:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 2 + // CK19: store i64 4, i64* [[STRIDE_3]], + + // CK19-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MSIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}}) + // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK19-DAG: [[PC0:%.+]] = bitcast [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]] to i8* + // CK19-DAG: [[PTRS:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i32 0, i32 0 + // CK19-DAG: store i8* [[PC0]], i8** [[PTRS]], + +#pragma omp target update to(arr[0:2][arg:][1:4]) + {++arg;} +} + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK20 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-64 +// RUN: %clang_cc1 -DCK20 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-64 +// RUN: %clang_cc1 -DCK20 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-32 +// RUN: %clang_cc1 -DCK20 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-32 + +// RUN: %clang_cc1 -DCK20 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK20 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK20 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK20 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// SIMD-ONLY19-NOT: {{__kmpc|__tgt}} +#ifdef CK20 + +struct ST { + int a; + double *b; +}; + +// CK20: [[STRUCT_ST:%.+]] = type { i32, double* } +// CK20: [[STRUCT_DESCRIPTOR:%.+]] = type { i64, i64, i64 } + +// CK20: [[MSIZE:@.+]] = {{.+}}constant [1 x i64] [i64 2] +// CK20: [[MTYPE:@.+]] = {{.+}}constant [1 x i64] [i64 2081] + +// CK20-LABEL: _Z3foo +void foo(int arg) { + ST arr[3][4]; + // CK20: [[DIMS:%.+]] = alloca [2 x [[STRUCT_DESCRIPTOR]]], + // CK20: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [[STRUCT_ST]]]], [3 x [4 x [[STRUCT_ST]]]]* [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK20: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [[STRUCT_ST]]], [4 x [[STRUCT_ST]]]* [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 + // CK20: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [[STRUCT_ST]], [[STRUCT_ST]]* [[ARRAY_DECAY]], {{.+}} + // CK20: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], {{.+}} 0, {{.+}} 0 + // CK20: [[BPC:%.+]] = bitcast i8** [[BP0]] to [3 x [4 x [[STRUCT_ST]]]]** + // CK20: store [3 x [4 x [[STRUCT_ST]]]]* [[ARR]], [3 x [4 x [[STRUCT_ST]]]]** [[BPC]], + // CK20: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], {{.+}} 0, {{.+}} 0 + // CK20: [[PC:%.+]] = bitcast i8** [[P0]] to [[STRUCT_ST]]** + // CK20: store [[STRUCT_ST]]* [[ARRAY_IDX_1]], [[STRUCT_ST]]** [[PC]], + // CK20: [[DIM_1:%.+]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR]]], [2 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 0 + // CK20: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 0 + // CK20: store i64 0, i64* [[OFFSET]], + // CK20: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 1 + // CK20: store i64 2, i64* [[COUNT]], + // CK20: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 2 + // CK20: store i64 {{32|64}}, i64* [[STRIDE]], + // CK20: [[DIM_2:%.+]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR]]], [2 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 1 + // CK20: [[OFFSET_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 0 + // CK20: store i64 1, i64* [[OFFSET_2]], + // CK20: [[COUNT_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 1 + // CK20: store i64 4, i64* [[COUNT_2]], + // CK20: [[STRIDE_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 2 + // CK20: store i64 {{8|16}}, i64* [[STRIDE_2]], + // CK20-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MSIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}}) + // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK20-DAG: [[PC0:%.+]] = bitcast [2 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]] to i8* + // CK20-DAG: [[PTRS:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i32 0, i32 0 + // CK20-DAG: store i8* [[PC0]], i8** [[PTRS]], + +#pragma omp target update to(arr[0:2][1:4]) + {++arg;} +} + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK21 --check-prefix CK21-64 +// RUN: %clang_cc1 -DCK21 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK21 --check-prefix CK21-64 +// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK21 --check-prefix CK21-32 +// RUN: %clang_cc1 -DCK21 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK21 --check-prefix CK21-32 + +// RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// SIMD-ONLY19-NOT: {{__kmpc|__tgt}} +#ifdef CK21 + +// CK21: [[STRUCT_ST:%.+]] = type { [10 x [10 x [10 x double*]]] } +// CK21: [[STRUCT_DESCRIPTOR:%.+]] = type { i64, i64, i64 } + +// CK21: [[MTYPE:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 281474976712705] + +struct ST { + double *dptr[10][10][10]; + +// CK21: _ZN2ST3fooEv + void foo() { + // CK21: [[DIMS:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]], + // CK21: [[ARRAY_IDX:%.+]] = getelementptr inbounds [10 x [10 x [10 x double*]]], [10 x [10 x [10 x double*]]]* [[DPTR:%.+]], {{.+}} 0, {{.+}} 0 + // CK21: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [10 x [10 x double*]], [10 x [10 x double*]]* [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 + // CK21: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [10 x double*], [10 x double*]* [[ARRAY_DECAY]], {{.+}} 1 + // CK21: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [10 x double*], [10 x double*]* [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0 + // CK21: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds {{.+}}, {{.+}}* [[ARRAY_DECAY_2]], {{.+}} 0 + // CK21: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], {{.+}} 0, {{.+}} 0 + // CK21: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i{{.+}} 0, i{{.+}} 0 + // CK21: [[DIM_1:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 0 + // CK21: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 0 + // CK21: store i64 0, i64* [[OFFSET]], + // CK21: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 1 + // CK21: store i64 2, i64* [[COUNT]], + // CK21: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 2 + // CK21: store i64 {{400|800}}, i64* [[STRIDE]], + // CK21: [[DIM_2:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 1 + // CK21: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 0 + // CK21: store i64 1, i64* [[OFFSET]], + // CK21: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 1 + // CK21: store i64 3, i64* [[COUNT]], + // CK21: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 2 + // CK21: store i64 {{40|80}}, i64* [[STRIDE]], + // CK21: [[DIM_3:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 2 + // CK21: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 0 + // CK21: store i64 0, i64* [[OFFSET]], + // CK21: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 1 + // CK21: store i64 4, i64* [[COUNT]], + // CK21: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 2 + // CK21: store i64 {{4|8}}, i64* [[STRIDE]], + // CK21-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}}* [[GEPSZ:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE]]{{.+}}) + // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK21-DAG: [[PC0:%.+]] = bitcast [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]] to i8* + // CK21-DAG: [[PTRS:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i32 0, i32 0 + // CK21-DAG: store i8* [[PC0]], i8** [[PTRS]], +#pragma omp target update to(dptr[0:2][1:3][0:4]) + } +}; + +void bar() { + ST st; + st.foo(); +} + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK22 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK22 --check-prefix CK22-64 +// RUN: %clang_cc1 -DCK22 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK22 --check-prefix CK22-64 +// RUN: %clang_cc1 -DCK22 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK22 --check-prefix CK22-32 +// RUN: %clang_cc1 -DCK22 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK22 --check-prefix CK22-32 + +// RUN: %clang_cc1 -DCK22 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK22 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK22 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK22 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// SIMD-ONLY19-NOT: {{__kmpc|__tgt}} +#ifdef CK22 + +// CK22: [[STRUCT_DESCRIPTOR:%.+]] = type { i64, i64, i64 } + +// CK22: [[MSIZE:@.+]] = {{.+}}constant [1 x i64] [i64 3] +// CK22: [[MTYPE:@.+]] = {{.+}}constant [1 x i64] [i64 2081] + +struct ST { +// CK22: _ZN2ST3fooEPA10_Pi + void foo(int *arr[5][10]) { + // CK22: [[DIMS:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]], + // CK22: [[ARRAY_IDX:%.+]] = getelementptr inbounds [10 x i32*], [10 x i32*]* [[ARR:%.+]], {{.+}} 0 + // CK22: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [10 x i32*], [10 x i32*]* [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 + // CK22: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds i32*, i32** [[ARRAY_DECAY:%.+]], {{.+}} 1 + // CK22: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], {{.+}} 0, {{.+}} 0 + // CK22: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i{{.+}} 0, i{{.+}} 0 + // CK22: [[DIM_1:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 0 + // CK22: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 0 + // CK22: store i64 0, i64* [[OFFSET]], + // CK22: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 1 + // CK22: store i64 2, i64* [[COUNT]], + // CK22: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 2 + // CK22: store i64 {{200|400}}, i64* [[STRIDE]], + // CK22: [[DIM_2:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 1 + // CK22: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 0 + // CK22: store i64 1, i64* [[OFFSET]], + // CK22: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 1 + // CK22: store i64 3, i64* [[COUNT]], + // CK22: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 2 + // CK22: store i64 {{40|80}}, i64* [[STRIDE]], + // CK22: [[DIM_3:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 2 + // CK22: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 0 + // CK22: store i64 0, i64* [[OFFSET]], + // CK22: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 1 + // CK22: store i64 4, i64* [[COUNT]], + // CK22: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 2 + // CK22: store i64 {{4|8}}, i64* [[STRIDE]], + // CK22-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MSIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}}) + // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK22-DAG: [[PC0:%.+]] = bitcast [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]] to i8* + // CK22-DAG: [[PTRS:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i32 0, i32 0 + // CK22-DAG: store i8* [[PC0]], i8** [[PTRS]], +#pragma omp target update to(arr[0:2][1:3][0:4]) + } +}; + +void bar() { + ST st; + int *arr[5][10]; + st.foo(arr); +} + #endif #endif diff --git a/clang/test/OpenMP/target_update_messages.cpp b/clang/test/OpenMP/target_update_messages.cpp --- a/clang/test/OpenMP/target_update_messages.cpp +++ b/clang/test/OpenMP/target_update_messages.cpp @@ -1,6 +1,8 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le45 -fopenmp -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le50 -fopenmp -fopenmp-version=50 -ferror-limit 100 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le45 -fopenmp-simd -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 100 %s -Wuninitialized void xxx(int argc) { int x; // expected-note {{initialize the variable 'x' to silence this warning}} @@ -36,5 +38,21 @@ { foo(); } + + double marr[10][5][10]; +#pragma omp target update to(marr[0:][2:4][1:2]) // le45-error {{array section does not specify contiguous storage}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} + {} +#pragma omp target update from(marr[0:][2:4][1:2]) // le45-error {{array section does not specify contiguous storage}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} + + int arr[4][3][2][1]; +#pragma omp target update to(arr[0:2][2:4][:2][1]) // le45-error {{array section does not specify contiguous storage}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} + {} +#pragma omp target update from(arr[0:2][2:4][:2][1]) // le45-error {{array section does not specify contiguous storage}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} + + double ***dptr; +#pragma omp target update to(dptr[0:2][2:4][1:2]) // le45-error {{array section does not specify contiguous storage}} le50-error 2 {{section length is unspecified and cannot be inferred because subscripted value is an array of unknown bound}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} + {} +#pragma omp target update from(dptr[0:2][2:4][1:2]) // le45-error {{array section does not specify contiguous storage}} le50-error 2 {{section length is unspecified and cannot be inferred because subscripted value is an array of unknown bound}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} + return tmain(argc, argv); } diff --git a/clang/test/OpenMP/target_update_to_messages.cpp b/clang/test/OpenMP/target_update_to_messages.cpp --- a/clang/test/OpenMP/target_update_to_messages.cpp +++ b/clang/test/OpenMP/target_update_to_messages.cpp @@ -79,6 +79,10 @@ #pragma omp target update to(*(*(this->ptr)+a+this->ptr)) // le45-error {{expected expression containing only member accesses and/or array sections based on named variables}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} #pragma omp target update to(*(this+this)) // expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} expected-error {{invalid operands to binary expression ('S8 *' and 'S8 *')}} {} + + double marr[10][5][10]; +#pragma omp target update to(marr[0:][2:4][1:2]) // le45-error {{array section does not specify contiguous storage}} le45-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} + {} } }; @@ -140,6 +144,7 @@ { #pragma omp target update to(s7.x) } + return 0; }