diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -4886,10 +4886,35 @@ std::copy(Components.begin(), Components.end(), getComponentsRef().begin()); } + /// Get the non-contiguous attribute per declaration that are in the trailing + /// objects of the class. + MutableArrayRef getNonContiguousListsRef() { + return MutableArrayRef( + static_cast(this)->template getTrailingObjects(), + NumComponentLists); + } + + /// Get the non-contiguous attribute per declaration that are in the trailing + /// objects of the class. + ArrayRef getNonContiguousListsRef() const { + return ArrayRef( + static_cast(this)->template getTrailingObjects(), + NumComponentLists); + } + + /// Set the non-contiguous attribute per declaration that are in the trailing + /// objects of the class. + void setNonContiguousLists(ArrayRef NLs) { + assert(NLs.size() == NumComponentLists && + "Unexpected amount of list numbers."); + std::copy(NLs.begin(), NLs.end(), getNonContiguousListsRef().begin()); + } + /// Fill the clause information from the list of declarations and /// associated component lists. void setClauseInfo(ArrayRef Declarations, - MappableExprComponentListsRef ComponentLists) { + MappableExprComponentListsRef ComponentLists, + ArrayRef NonContiguousList) { // Perform some checks to make sure the data sizes are consistent with the // information available when the clause was created. assert(getUniqueDeclarationsTotalNumber(Declarations) == @@ -4901,6 +4926,8 @@ "Declaration and component lists size is not consistent!"); assert(Declarations.size() == NumComponentLists && "Unexpected declaration and component lists size!"); + assert(NonContiguousList.size() == ComponentLists.size() && + "Unexpected NonContiguousList size"); // Organize the components by declaration and retrieve the original // expression. Original expressions are always the first component of the @@ -4960,6 +4987,9 @@ CI = std::copy(C.begin(), C.end(), CI); } } + + std::copy(NonContiguousList.begin(), NonContiguousList.end(), + getNonContiguousListsRef().begin()); } /// Set the nested name specifier of associated user-defined mapper. @@ -5221,6 +5251,34 @@ return const_all_components_range(A.begin(), A.end()); } + using non_contiguous_list_iterator = MutableArrayRef::iterator; + using non_contiguous_list_const_iterator = ArrayRef::iterator; + using non_contiguous_list_range = + llvm::iterator_range; + using non_contiguous_list_const_range = + llvm::iterator_range; + + non_contiguous_list_iterator non_contiguous_list_begin() { + return getNonContiguousListsRef().begin(); + } + non_contiguous_list_iterator non_contiguous_list_end() { + return getNonContiguousListsRef().end(); + } + non_contiguous_list_const_iterator non_contiguous_list_begin() const { + return getNonContiguousListsRef().begin(); + } + non_contiguous_list_const_iterator non_contiguous_list_end() const { + return getNonContiguousListsRef().end(); + } + non_contiguous_list_range non_contiguous_lists() { + return non_contiguous_list_range(non_contiguous_list_begin(), + non_contiguous_list_end()); + } + non_contiguous_list_const_range non_contiguous_lists() const { + return non_contiguous_list_const_range(non_contiguous_list_begin(), + non_contiguous_list_end()); + } + using mapperlist_iterator = MutableArrayRef::iterator; using mapperlist_const_iterator = ArrayRef::iterator; using mapperlist_range = llvm::iterator_range; @@ -5251,10 +5309,11 @@ /// \endcode /// In this example directive '#pragma omp target' has clause 'map' /// with the variables 'a' and 'b'. -class OMPMapClause final : public OMPMappableExprListClause, - private llvm::TrailingObjects< - OMPMapClause, Expr *, ValueDecl *, unsigned, - OMPClauseMappableExprCommon::MappableComponent> { +class OMPMapClause final + : public OMPMappableExprListClause, + private llvm::TrailingObjects< + OMPMapClause, Expr *, ValueDecl *, bool, unsigned, + OMPClauseMappableExprCommon::MappableComponent> { friend class OMPClauseReader; friend OMPMappableExprListClause; friend OMPVarListClause; @@ -5270,6 +5329,9 @@ size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum(); } + size_t numTrailingObjects(OverloadToken) const { + return getTotalComponentListNum(); + } size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum() + getTotalComponentListNum(); } @@ -5403,7 +5465,7 @@ Create(const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef Vars, ArrayRef Declarations, MappableExprComponentListsRef ComponentLists, - ArrayRef UDMapperRefs, + ArrayRef NonContiguousList, ArrayRef UDMapperRefs, ArrayRef MapModifiers, ArrayRef MapModifiersLoc, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId, @@ -6206,7 +6268,7 @@ /// with the variables 'a' and 'b'. class OMPToClause final : public OMPMappableExprListClause, private llvm::TrailingObjects< - OMPToClause, Expr *, ValueDecl *, unsigned, + OMPToClause, Expr *, ValueDecl *, bool, unsigned, OMPClauseMappableExprCommon::MappableComponent> { friend class OMPClauseReader; friend OMPMappableExprListClause; @@ -6254,6 +6316,9 @@ size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum(); } + size_t numTrailingObjects(OverloadToken) const { + return getTotalComponentListNum(); + } size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum() + getTotalComponentListNum(); } @@ -6273,13 +6338,12 @@ /// \param UDMQualifierLoc C++ nested name specifier for the associated /// user-defined mapper. /// \param MapperId The identifier of associated user-defined mapper. - static OMPToClause *Create(const ASTContext &C, const OMPVarListLocTy &Locs, - ArrayRef Vars, - ArrayRef Declarations, - MappableExprComponentListsRef ComponentLists, - ArrayRef UDMapperRefs, - NestedNameSpecifierLoc UDMQualifierLoc, - DeclarationNameInfo MapperId); + static OMPToClause * + Create(const ASTContext &C, const OMPVarListLocTy &Locs, + ArrayRef Vars, ArrayRef Declarations, + MappableExprComponentListsRef ComponentLists, + ArrayRef IsNonContiguousList, ArrayRef UDMapperRefs, + NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId); /// Creates an empty clause with the place for \a NumVars variables. /// @@ -6325,7 +6389,7 @@ class OMPFromClause final : public OMPMappableExprListClause, private llvm::TrailingObjects< - OMPFromClause, Expr *, ValueDecl *, unsigned, + OMPFromClause, Expr *, ValueDecl *, bool, unsigned, OMPClauseMappableExprCommon::MappableComponent> { friend class OMPClauseReader; friend OMPMappableExprListClause; @@ -6373,6 +6437,9 @@ size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum(); } + size_t numTrailingObjects(OverloadToken) const { + return getTotalComponentListNum(); + } size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum() + getTotalComponentListNum(); } @@ -6392,13 +6459,12 @@ /// \param UDMQualifierLoc C++ nested name specifier for the associated /// user-defined mapper. /// \param MapperId The identifier of associated user-defined mapper. - static OMPFromClause *Create(const ASTContext &C, const OMPVarListLocTy &Locs, - ArrayRef Vars, - ArrayRef Declarations, - MappableExprComponentListsRef ComponentLists, - ArrayRef UDMapperRefs, - NestedNameSpecifierLoc UDMQualifierLoc, - DeclarationNameInfo MapperId); + static OMPFromClause * + Create(const ASTContext &C, const OMPVarListLocTy &Locs, + ArrayRef Vars, ArrayRef Declarations, + MappableExprComponentListsRef ComponentLists, + ArrayRef NonContiguousList, ArrayRef UDMapperRefs, + NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId); /// Creates an empty clause with the place for \a NumVars variables. /// @@ -6444,7 +6510,7 @@ class OMPUseDevicePtrClause final : public OMPMappableExprListClause, private llvm::TrailingObjects< - OMPUseDevicePtrClause, Expr *, ValueDecl *, unsigned, + OMPUseDevicePtrClause, Expr *, ValueDecl *, bool, unsigned, OMPClauseMappableExprCommon::MappableComponent> { friend class OMPClauseReader; friend OMPMappableExprListClause; @@ -6485,6 +6551,9 @@ size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum(); } + size_t numTrailingObjects(OverloadToken) const { + return getTotalComponentListNum(); + } size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum() + getTotalComponentListNum(); } @@ -6608,7 +6677,7 @@ class OMPIsDevicePtrClause final : public OMPMappableExprListClause, private llvm::TrailingObjects< - OMPIsDevicePtrClause, Expr *, ValueDecl *, unsigned, + OMPIsDevicePtrClause, Expr *, ValueDecl *, bool, unsigned, OMPClauseMappableExprCommon::MappableComponent> { friend class OMPClauseReader; friend OMPMappableExprListClause; @@ -6648,6 +6717,9 @@ size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum(); } + size_t numTrailingObjects(OverloadToken) const { + return getTotalComponentListNum(); + } size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum() + getTotalComponentListNum(); } diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -988,7 +988,8 @@ OMPMapClause *OMPMapClause::Create( const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef Vars, ArrayRef Declarations, - MappableExprComponentListsRef ComponentLists, ArrayRef UDMapperRefs, + MappableExprComponentListsRef ComponentLists, + ArrayRef NonContiguousList, ArrayRef UDMapperRefs, ArrayRef MapModifiers, ArrayRef MapModifiersLoc, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId, @@ -1004,15 +1005,17 @@ // user-defined mapper for each clause list entry. // NumUniqueDeclarations x ValueDecl* - unique base declarations associated // with each component list. + // NumComponentLists x bool - number of non-contiguous attribute. // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the // number of lists for each unique declaration and the size of each component // list. // NumComponents x MappableComponent - the total of all the components in all // the lists. void *Mem = C.Allocate( - totalSizeToAlloc( 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); OMPMapClause *Clause = new (Mem) @@ -1021,7 +1024,7 @@ Clause->setVarRefs(Vars); Clause->setUDMapperRefs(UDMapperRefs); - Clause->setClauseInfo(Declarations, ComponentLists); + Clause->setClauseInfo(Declarations, ComponentLists, NonContiguousList); Clause->setMapType(Type); Clause->setMapLoc(TypeLoc); return Clause; @@ -1031,9 +1034,9 @@ OMPMapClause::CreateEmpty(const ASTContext &C, const OMPMappableExprListSizeTy &Sizes) { void *Mem = C.Allocate( - totalSizeToAlloc( - 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, Sizes.NumVars, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); return new (Mem) OMPMapClause(Sizes); @@ -1042,7 +1045,8 @@ OMPToClause *OMPToClause::Create( const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef Vars, ArrayRef Declarations, - MappableExprComponentListsRef ComponentLists, ArrayRef UDMapperRefs, + MappableExprComponentListsRef ComponentLists, + ArrayRef NonContiguousList, ArrayRef UDMapperRefs, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId) { OMPMappableExprListSizeTy Sizes; Sizes.NumVars = Vars.size(); @@ -1055,15 +1059,17 @@ // user-defined mapper for each clause list entry. // NumUniqueDeclarations x ValueDecl* - unique base declarations associated // with each component list. + // NumComponentLists x bool - number of non-contiguous attribute. // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the // number of lists for each unique declaration and the size of each component // list. // NumComponents x MappableComponent - the total of all the components in all // the lists. void *Mem = C.Allocate( - totalSizeToAlloc( 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); @@ -1071,16 +1077,18 @@ Clause->setVarRefs(Vars); Clause->setUDMapperRefs(UDMapperRefs); - Clause->setClauseInfo(Declarations, ComponentLists); + Clause->setClauseInfo(Declarations, ComponentLists, NonContiguousList); + return Clause; } OMPToClause *OMPToClause::CreateEmpty(const ASTContext &C, const OMPMappableExprListSizeTy &Sizes) { void *Mem = C.Allocate( - totalSizeToAlloc( 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); return new (Mem) OMPToClause(Sizes); @@ -1089,7 +1097,8 @@ OMPFromClause *OMPFromClause::Create( const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef Vars, ArrayRef Declarations, - MappableExprComponentListsRef ComponentLists, ArrayRef UDMapperRefs, + MappableExprComponentListsRef ComponentLists, + ArrayRef NonContiguousList, ArrayRef UDMapperRefs, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId) { OMPMappableExprListSizeTy Sizes; Sizes.NumVars = Vars.size(); @@ -1102,15 +1111,17 @@ // user-defined mapper for each clause list entry. // NumUniqueDeclarations x ValueDecl* - unique base declarations associated // with each component list. + // NumComponentLists x bool - number of non-contiguous attribute. // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the // number of lists for each unique declaration and the size of each component // list. // NumComponents x MappableComponent - the total of all the components in all // the lists. void *Mem = C.Allocate( - totalSizeToAlloc( 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); @@ -1119,7 +1130,8 @@ Clause->setVarRefs(Vars); Clause->setUDMapperRefs(UDMapperRefs); - Clause->setClauseInfo(Declarations, ComponentLists); + Clause->setClauseInfo(Declarations, ComponentLists, NonContiguousList); + return Clause; } @@ -1127,9 +1139,10 @@ OMPFromClause::CreateEmpty(const ASTContext &C, const OMPMappableExprListSizeTy &Sizes) { void *Mem = C.Allocate( - totalSizeToAlloc( 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); return new (Mem) OMPFromClause(Sizes); @@ -1163,15 +1176,17 @@ // list entry and an equal number of private copies and inits. // NumUniqueDeclarations x ValueDecl* - unique base declarations associated // with each component list. + // NumComponentLists x bool - number of non-contiguous attribute. // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the // number of lists for each unique declaration and the size of each component // list. // NumComponents x MappableComponent - the total of all the components in all // the lists. void *Mem = C.Allocate( - totalSizeToAlloc( 3 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); @@ -1180,7 +1195,8 @@ Clause->setVarRefs(Vars); Clause->setPrivateCopies(PrivateVars); Clause->setInits(Inits); - Clause->setClauseInfo(Declarations, ComponentLists); + SmallVector NonContiguousList(Declarations.size(), false); + Clause->setClauseInfo(Declarations, ComponentLists, NonContiguousList); return Clause; } @@ -1188,9 +1204,10 @@ OMPUseDevicePtrClause::CreateEmpty(const ASTContext &C, const OMPMappableExprListSizeTy &Sizes) { void *Mem = C.Allocate( - totalSizeToAlloc( 3 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); return new (Mem) OMPUseDevicePtrClause(Sizes); @@ -1212,22 +1229,24 @@ // entry. // NumUniqueDeclarations x ValueDecl* - unique base declarations associated // with each component list. + // NumComponentLists x bool - number of non-contiguous attribute. // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the // number of lists for each unique declaration and the size of each component // list. // NumComponents x MappableComponent - the total of all the components in all // the lists. void *Mem = C.Allocate( - totalSizeToAlloc( - Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumVars, Sizes.NumUniqueDeclarations, Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); OMPIsDevicePtrClause *Clause = new (Mem) OMPIsDevicePtrClause(Locs, Sizes); Clause->setVarRefs(Vars); - Clause->setClauseInfo(Declarations, ComponentLists); + SmallVector NonContiguousList(Declarations.size(), false); + Clause->setClauseInfo(Declarations, ComponentLists, NonContiguousList); return Clause; } @@ -1235,9 +1254,9 @@ OMPIsDevicePtrClause::CreateEmpty(const ASTContext &C, const OMPMappableExprListSizeTy &Sizes) { void *Mem = C.Allocate( - totalSizeToAlloc( - Sizes.NumVars, Sizes.NumUniqueDeclarations, + Sizes.NumVars, Sizes.NumUniqueDeclarations, Sizes.NumComponentLists, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); return new (Mem) OMPIsDevicePtrClause(Sizes); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7820,6 +7820,10 @@ /// Close is a hint to the runtime to allocate memory close to /// the target device. OMP_MAP_CLOSE = 0x400, + /// Signal that the runtime library should use args as an array of + /// descriptor_dim pointers and use args_size as dims. Used when we have + /// non-contiguous list items in target update directive + OMP_MAP_DESCRIPTOR = 0x800, /// The 16 MSBs of the flags indicate whether the entry is member of some /// struct/class. OMP_MAP_MEMBER_OF = 0xffff000000000000, @@ -7855,6 +7859,8 @@ using MapBaseValuesArrayTy = SmallVector; using MapValuesArrayTy = SmallVector; using MapFlagsArrayTy = SmallVector; + using MapDimArrayTy = SmallVector; + using MapNonContiguousArrayTy = SmallVector; /// Map between a struct and the its lowest & highest elements which have been /// mapped. @@ -7876,15 +7882,17 @@ ArrayRef MapModifiers; bool ReturnDevicePointer = false; bool IsImplicit = false; + bool IsNonContiguous = false; MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) + ArrayRef MapModifiers, bool ReturnDevicePointer, + bool IsImplicit, bool IsNonContiguous) : Components(Components), MapType(MapType), MapModifiers(MapModifiers), - ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), + IsNonContiguous(IsNonContiguous) {} }; /// If use_device_ptr is used on a pointer which is a struct member and there @@ -7998,9 +8006,11 @@ /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - OpenMPOffloadMappingFlags getMapTypeBits( - OpenMPMapClauseKind MapType, ArrayRef MapModifiers, - bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { + OpenMPOffloadMappingFlags + getMapTypeBits(OpenMPMapClauseKind MapType, + ArrayRef MapModifiers, bool IsImplicit, + bool AddPtrFlag, bool AddIsTargetParamFlag, + bool IsNonContiguous) const { OpenMPOffloadMappingFlags Bits = IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; switch (MapType) { @@ -8036,6 +8046,8 @@ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) != MapModifiers.end()) Bits |= OMP_MAP_CLOSE; + if (IsNonContiguous) + Bits |= OMP_MAP_DESCRIPTOR; return Bits; } @@ -8083,15 +8095,15 @@ /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( - OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, + OpenMPMapClauseKind MapType, ArrayRef MapModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, MapDimArrayTy &Dims, StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, bool IsImplicit, ArrayRef - OverlappedElements = llvm::None) const { + OverlappedElements = llvm::None, + bool IsNonContiguous = false) const { // The following summarizes what has to be generated for each map and the // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags @@ -8338,6 +8350,9 @@ // whether we are dealing with a member of a declared struct. const MemberExpr *EncounteredME = nullptr; + // Track for the total number of dimension. + uint64_t DimSize = 0; + for (; I != CE; ++I) { // If the current component is member of a struct (parent struct) mark it. if (!EncounteredME) { @@ -8356,8 +8371,11 @@ // becomes the base address for the following components. // A final array section, is one whose length can't be proved to be one. + // If the map item is non-contiguous then we don't treat any array section + // as final array section. bool IsFinalArraySection = - isFinalArraySectionExpression(I->getAssociatedExpression()); + isFinalArraySectionExpression(I->getAssociatedExpression()) && + (!IsNonContiguous); // Get information on whether the element is a pointer. Have to do a // special treatment for array sections given that they are built-in @@ -8376,6 +8394,11 @@ I->getAssociatedExpression()->getType()->isAnyPointerType(); bool IsNonDerefPointer = IsPointer && !UO && !BO; + if (OASE || OAShE || + dyn_cast(I->getAssociatedExpression())) { + DimSize++; + } + if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { // If this is not the last component, we expect the pointer to be // associated with an array expression or member expression. @@ -8430,7 +8453,8 @@ OMP_MAP_MEMBER_OF | getMapTypeBits(MapType, MapModifiers, IsImplicit, /*AddPtrFlag=*/false, - /*AddIsTargetParamFlag=*/false); + /*AddIsTargetParamFlag=*/false, + /*IsNonContiguous=*/IsNonContiguous); LB = BP; llvm::Value *Size = nullptr; // Do bitcopy of all non-overlapped structure elements. @@ -8454,6 +8478,7 @@ Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); Types.push_back(Flags); + Dims.push_back(IsNonContiguous ? DimSize : 0); LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } BasePointers.push_back(BP.getPointer()); @@ -8465,6 +8490,7 @@ Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); Types.push_back(Flags); + Dims.push_back(IsNonContiguous ? DimSize : 0); break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); @@ -8473,15 +8499,17 @@ Pointers.push_back(LB.getPointer()); Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); + Dims.push_back(IsNonContiguous ? DimSize : 0); // We need to add a pointer flag for each map that comes from the // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). - OpenMPOffloadMappingFlags Flags = getMapTypeBits( - MapType, MapModifiers, IsImplicit, - !IsExpressionFirstInfo || RequiresReference, - IsCaptureFirstInfo && !RequiresReference); + OpenMPOffloadMappingFlags Flags = + getMapTypeBits(MapType, MapModifiers, IsImplicit, + !IsExpressionFirstInfo || RequiresReference, + IsCaptureFirstInfo && !RequiresReference, + /*IsNonContiguous=*/IsNonContiguous); if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, @@ -8536,6 +8564,154 @@ } } + /// Generate the base pointers, section pointers, sizes , map type bits, + /// dimension size, offset, count, and strides for the provided map type, map + /// modifier, and expression components. \a IsFirstComponent should be set to + /// true if the provided set of components is the first associated with a + /// capture. + void generateInfoForTargetDataComponentList( + OpenMPMapClauseKind MapType, ArrayRef MapModifiers, + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, MapDimArrayTy &Dims, + MapNonContiguousArrayTy &Offsets, MapNonContiguousArrayTy &Counts, + MapNonContiguousArrayTy &Strides, StructRangeInfoTy &PartialStruct, + bool IsFirstComponentList, bool IsImplicit, + ArrayRef + OverlappedElements = llvm::None) const { + + generateInfoForComponentList(MapType, MapModifiers, Components, + BasePointers, Pointers, Sizes, Types, Dims, + PartialStruct, IsFirstComponentList, + IsImplicit, OverlappedElements, true); + + const ASTContext &Context = CGF.getContext(); + + MapValuesArrayTy CurOffsets; + MapValuesArrayTy CurCounts; + MapValuesArrayTy CurStrides; + llvm::Value *CurStride = nullptr; + + // Collect Size information for each dimension and get the element size as + // the first Stride. For example, for `int arr[10][10]`, the DimSizes + // should be [10, 10] and the first stride is 4 btyes. + SmallVector DimSizes; + for (const auto &Component : Components) { + const Expr *AssocExpr = Component.getAssociatedExpression(); + const auto *AE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); + if (AE || OASE) { + QualType Ty; + if (OASE) + Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + else + Ty = AE->getType(); + auto *CAT = Context.getAsConstantArrayType(Ty); + auto *VAT = Context.getAsVariableArrayType(Ty); + // Get element size if we CurStrides is empty. + if (CurStrides.empty()) { + const Type *ElementType = nullptr; + uint64_t ElementTypeSize; + if (CAT) { + ElementType = CAT->getElementType().getTypePtr(); + ElementTypeSize = + Context.getTypeSizeInChars(ElementType).getQuantity(); + } else { + assert(VAT && "Should be either ConstantArray or VariableArray"); + ElementType = VAT->getElementType().getTypePtr(); + ElementTypeSize = + Context.getTypeSizeInChars(ElementType).getQuantity(); + } + CurStrides.push_back( + llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); + } + // Get dimension value. + llvm::Value *SizeV = nullptr; + if (CAT) { + llvm::APInt Size = CAT->getSize(); + SizeV = llvm::ConstantInt::get(CGF.SizeTy, Size); + } else { + assert(VAT && "Should be either ConstantArray or VariableArray"); + const Expr *Size = VAT->getSizeExpr(); + SizeV = CGF.EmitScalarExpr(Size); + } + SizeV = CGF.Builder.CreateIntCast(SizeV, CGF.Int64Ty, + /*IsSigned=*/false); + DimSizes.push_back(SizeV); + } + } + + // Scan the components from the base to the complete expression. + auto CI = Components.begin(); + auto CE = Components.end(); + auto I = CI; + auto DI = DimSizes.begin(); + + // Collect info for non-contiguous. Notice that offset, count, and stride + // are only meaningful for array-section, so we insert a null for anything + // other than array-section. + // Also, the size of offset, count, and stride are not the same as pointers, + // base_pointers, sizes, or dims. Instead, the size of offset, count, and + // stride are the same as the number of non-contiguous declaration in target + // update to/from clause. + for (; I != CE; ++I) { + const Expr *AssocExpr = I->getAssociatedExpression(); + const auto *AE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); + + if (OASE || AE) { + // Offset + const Expr *OffsetExpr = nullptr; + if (OASE) + OffsetExpr = OASE->getLowerBound(); + else + OffsetExpr = AE->getIdx(); + llvm::Value *Offset = nullptr; + if (!OffsetExpr) { + // If offset is absent, then we just set it to zero. + Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); + } else { + Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), + CGF.Int64Ty, + /*isSigned=*/false); + } + CurOffsets.push_back(Offset); + // Count + const Expr *CountExpr = nullptr; + if (OASE) + CountExpr = OASE->getLength(); + llvm::Value *Count = nullptr; + if (!CountExpr) { + // If length is absent then we calculate it as (Total length - + // lower_bound) + Count = CGF.Builder.CreateNUWSub(*DI, Offset); + } else { + Count = CGF.EmitScalarExpr(CountExpr); + } + Count = + CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); + CurCounts.push_back(Count); + // Stride = previous stride * previous dimension size + // Take `int arr[5][10]` and `arr[0:2][0:2]` as an example: + // Dimension 1 Dimension 0 + // Offset 0 0 + // Count 2 2 + // Stride 40 bytes (4x10) 4 bytes (int) + if (DI != DimSizes.begin()) { + CurStride = + CGF.Builder.CreateNUWMul(CurStrides.back(), *std::prev(DI, 1)); + CurStrides.push_back(CurStride); + } + + DI++; + } + } + + Offsets.push_back(CurOffsets); + Counts.push_back(CurCounts); + Strides.push_back(CurStrides); + } + /// Return the adjusted map modifiers if the declaration a capture refers to /// appears in a first-private clause. This is expected to be used only with /// directives that start with 'target'. @@ -8714,7 +8890,10 @@ /// index where it occurs is appended to the device pointers info array. void generateAllInfo(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + MapFlagsArrayTy &Types, MapDimArrayTy &Dims, + MapNonContiguousArrayTy &Offsets, + MapNonContiguousArrayTy &Counts, + MapNonContiguousArrayTy &Strides) const { // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. @@ -8722,17 +8901,18 @@ // Helper function to fill the information map for the different supported // clauses. - auto &&InfoGen = [&Info]( - const ValueDecl *D, - OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) { - const ValueDecl *VD = - D ? cast(D->getCanonicalDecl()) : nullptr; - Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, - IsImplicit); - }; + auto &&InfoGen = + [&Info](const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef MapModifiers, + bool ReturnDevicePointer, bool IsImplicit, + bool IsNonContiguous) { + const ValueDecl *VD = + D ? cast(D->getCanonicalDecl()) : nullptr; + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, + IsImplicit, IsNonContiguous); + }; assert(CurDir.is() && "Expect a executable directive"); @@ -8740,18 +8920,27 @@ for (const auto *C : CurExecDir->getClausesOfKind()) for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), - /*ReturnDevicePointer=*/false, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit(), + /*IsNonContiguous=*/false); } - for (const auto *C : CurExecDir->getClausesOfKind()) - for (const auto L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit()); + for (const auto *C : CurExecDir->getClausesOfKind()) { + auto CI = C->component_lists_begin(); + auto CE = C->component_lists_end(); + auto NI = C->non_contiguous_list_begin(); + for (; CI != CE; ++CI, ++NI) { + InfoGen((*CI).first, (*CI).second, OMPC_MAP_to, llvm::None, + /*ReturnDevicePointer=*/false, C->isImplicit(), *NI); } - for (const auto *C : CurExecDir->getClausesOfKind()) - for (const auto L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit()); + } + for (const auto *C : CurExecDir->getClausesOfKind()) { + auto CI = C->component_lists_begin(); + auto CE = C->component_lists_end(); + auto NI = C->non_contiguous_list_begin(); + for (; CI != CE; ++CI, ++NI) { + InfoGen((*CI).first, (*CI).second, OMPC_MAP_from, llvm::None, + /*ReturnDevicePointer=*/false, C->isImplicit(), *NI); } + } // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the @@ -8801,7 +8990,8 @@ // the pointer into account for the calculation of the range of the // partial struct. InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit(), + /*IsNonContiguous=*/false); DeferredInfo[nullptr].emplace_back(IE, VD); } else { llvm::Value *Ptr = @@ -8824,6 +9014,10 @@ MapValuesArrayTy CurPointers; MapValuesArrayTy CurSizes; MapFlagsArrayTy CurTypes; + MapDimArrayTy CurDims; + MapNonContiguousArrayTy CurOffsets; + MapNonContiguousArrayTy CurCounts; + MapNonContiguousArrayTy CurStrides; StructRangeInfoTy PartialStruct; for (const MapInfo &L : M.second) { @@ -8832,10 +9026,18 @@ // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, - CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + if (L.IsNonContiguous) { + generateInfoForTargetDataComponentList( + L.MapType, L.MapModifiers, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, CurDims, CurOffsets, CurCounts, + CurStrides, PartialStruct, IsFirstComponentList, L.IsImplicit); + } else { + // Indicate that we do not do the special non-contiguous codegen + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, CurDims, PartialStruct, + IsFirstComponentList, L.IsImplicit); + } // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8875,15 +9077,24 @@ // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) + if (PartialStruct.Base.isValid()) { + // Make sure Dims have the same size as BP, P, Sizes, and Types. + // Put 0 here to make sure that `emitTargetDataOffloadingArrays` use it + // to skip this one. + CurDims.push_back(0); emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, PartialStruct); + } // We need to append the results of this capture to what we already have. BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); Pointers.append(CurPointers.begin(), CurPointers.end()); Sizes.append(CurSizes.begin(), CurSizes.end()); Types.append(CurTypes.begin(), CurTypes.end()); + Dims.append(CurDims.begin(), CurDims.end()); + Offsets.append(CurOffsets.begin(), CurOffsets.end()); + Counts.append(CurCounts.begin(), CurCounts.end()); + Strides.append(CurStrides.begin(), CurStrides.end()); } } @@ -8912,7 +9123,7 @@ const ValueDecl *VD = D ? cast(D->getCanonicalDecl()) : nullptr; Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, - IsImplicit); + IsImplicit, /*IsNonContiguous=*/false); }; for (const auto *C : CurMapperDir->clauselists()) { @@ -8933,6 +9144,7 @@ MapValuesArrayTy CurPointers; MapValuesArrayTy CurSizes; MapFlagsArrayTy CurTypes; + MapDimArrayTy CurDims; StructRangeInfoTy PartialStruct; for (const MapInfo &L : M.second) { @@ -8940,7 +9152,7 @@ "Not expecting declaration with no component lists."); generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, + CurTypes, CurDims, PartialStruct, IsFirstComponentList, L.IsImplicit); IsFirstComponentList = false; } @@ -9059,6 +9271,7 @@ MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + MapDimArrayTy &Dims, StructRangeInfoTy &PartialStruct) const { assert(!Cap->capturesVariableArrayType() && "Not expecting to generate map info for a variable array type!"); @@ -9208,7 +9421,7 @@ OverlappedComponents = Pair.getSecond(); bool IsFirstComponentList = true; generateInfoForComponentList(MapType, MapModifiers, Components, - BasePointers, Pointers, Sizes, Types, + BasePointers, Pointers, Sizes, Types, Dims, PartialStruct, IsFirstComponentList, IsImplicit, OverlappedComponents); } @@ -9222,10 +9435,9 @@ std::tie(Components, MapType, MapModifiers, IsImplicit) = L; auto It = OverlappedData.find(&L); if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, Components, - BasePointers, Pointers, Sizes, Types, - PartialStruct, IsFirstComponentList, - IsImplicit); + generateInfoForComponentList( + MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, + Types, Dims, PartialStruct, IsFirstComponentList, IsImplicit); IsFirstComponentList = false; } } @@ -9235,7 +9447,8 @@ void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + MapFlagsArrayTy &Types, + MapDimArrayTy &Dims) const { assert(CurDir.is() && "Expect a executable directive"); const auto *CurExecDir = CurDir.get(); @@ -9256,7 +9469,7 @@ StructRangeInfoTy PartialStruct; generateInfoForComponentList( C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, - Pointers, Sizes, Types, PartialStruct, + Pointers, Sizes, Types, Dims, PartialStruct, /*IsFirstComponentList=*/true, C->isImplicit()); assert(!PartialStruct.Base.isValid() && "No partial structs for declare target link expected."); @@ -9350,16 +9563,15 @@ }; } // anonymous namespace -/// Emit the arrays used to pass the captures and map information to the -/// offloading runtime library. If there is no map or capture information, -/// return nullptr by reference. static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, - CGOpenMPRuntime::TargetDataInfo &Info) { + MappableExprsHandler::MapDimArrayTy &Dims, + CGOpenMPRuntime::TargetDataInfo &Info, + bool IsNonContiguous = false) { CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGF.getContext(); @@ -9402,8 +9614,14 @@ // We expect all the sizes to be constant, so we collect them to create // a constant array. SmallVector ConstSizes; - for (llvm::Value *S : Sizes) - ConstSizes.push_back(cast(S)); + for (unsigned I = 0, E = Sizes.size(); I < E; ++I) { + if (IsNonContiguous && + (MapTypes[I] & MappableExprsHandler::OMP_MAP_DESCRIPTOR)) { + ConstSizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, Dims[I])); + } else { + ConstSizes.push_back(cast(Sizes[I])); + } + } auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); @@ -9469,6 +9687,89 @@ } } +/// Emit the arrays used to pass the captures and map information to the +/// offloading runtime library. If there is no map or capture information, +/// return nullptr by reference. +static void emitTargetDataOffloadingArrays( + CodeGenFunction &CGF, + MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, + MappableExprsHandler::MapValuesArrayTy &Pointers, + MappableExprsHandler::MapValuesArrayTy &Sizes, + MappableExprsHandler::MapFlagsArrayTy &MapTypes, + MappableExprsHandler::MapDimArrayTy &Dims, + MappableExprsHandler::MapNonContiguousArrayTy &Offsets, + MappableExprsHandler::MapNonContiguousArrayTy &Counts, + MappableExprsHandler::MapNonContiguousArrayTy &Strides, + CGOpenMPRuntime::TargetDataInfo &Info) { + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Dims, Info, + true); + + if (Offsets.empty()) + return; + + ASTContext &C = CGF.getContext(); + CodeGenModule &CGM = CGF.CGM; + + // Build an array of struct descriptor_dim and then assign it to offload_args. + if (Info.NumberOfPtrs) { + // Build struct descriptor_dim { + // int64_t offset; + // int64_t count; + // int64_t stride + // }; + QualType Int64Ty = + C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + RecordDecl *RD; + RD = C.buildImplicitRecord("descriptor_dim"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + RD->completeDefinition(); + QualType DimTy = C.getRecordType(RD); + + enum { OffsetFD = 0, CountFD, StrideFD }; + // The reason we need two index variable here is because the size of "Dims" + // is the same as the size of Components, however, the size of offset, count + // , and stride is equal to the size of base declaration that is + // non-contiguous. + for (unsigned I = 0, L = 0, E = Info.NumberOfPtrs; I < E; ++I) { + if (Dims[I] == 0) + continue; + llvm::APInt Size(/*numBits=*/32, Dims[I]); + QualType ArrayTy = + C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); + Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); + for (unsigned II = 0, EE = Dims[I]; II < EE; ++II) { + unsigned RevIdx = EE - II - 1; + LValue DimsLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); + // Offset + LValue OffsetLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), OffsetFD)); + CGF.EmitStoreOfScalar(Offsets[L][RevIdx], OffsetLVal); + // Count + LValue CountLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), CountFD)); + CGF.EmitStoreOfScalar(Counts[L][RevIdx], CountLVal); + // Stride + LValue StrideLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), StrideFD)); + CGF.EmitStoreOfScalar(Strides[L][RevIdx], StrideLVal); + } + // args[I] = &dims + Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + DimsAddr, CGM.Int8PtrTy); + llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.PointersArray, 0, I); + Address PAddr(P, C.getTypeAlignInChars(C.VoidPtrTy)); + CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); + ++L; + } + } +} + /// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( @@ -10142,6 +10443,7 @@ MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapDimArrayTy Dims; // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); @@ -10156,6 +10458,7 @@ MappableExprsHandler::MapValuesArrayTy CurPointers; MappableExprsHandler::MapValuesArrayTy CurSizes; MappableExprsHandler::MapFlagsArrayTy CurMapTypes; + MappableExprsHandler::MapDimArrayTy CurDims; MappableExprsHandler::StructRangeInfoTy PartialStruct; // VLA sizes are passed to the outlined region by copy and do not have map @@ -10173,7 +10476,8 @@ // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, - CurSizes, CurMapTypes, PartialStruct); + CurSizes, CurMapTypes, CurDims, + PartialStruct); if (CurBasePointers.empty()) MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); @@ -10210,11 +10514,12 @@ // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, - MapTypes); + MapTypes, Dims); TargetDataInfo Info; // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Dims, + Info); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, Info); @@ -10812,13 +11117,19 @@ MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapDimArrayTy Dims; + MappableExprsHandler::MapNonContiguousArrayTy Offsets; + MappableExprsHandler::MapNonContiguousArrayTy Counts; + MappableExprsHandler::MapNonContiguousArrayTy Strides; // Get map clause information. MappableExprsHandler MCHandler(D, CGF); - MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes, Dims, + Offsets, Counts, Strides); // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitTargetDataOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, + Dims, Offsets, Counts, Strides, Info); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; @@ -11048,14 +11359,20 @@ MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapDimArrayTy Dims; + MappableExprsHandler::MapNonContiguousArrayTy Offsets; + MappableExprsHandler::MapNonContiguousArrayTy Counts; + MappableExprsHandler::MapNonContiguousArrayTy Strides; // Get map clause information. MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes, Dims, + Offsets, Counts, Strides); TargetDataInfo Info; // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitTargetDataOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, + Dims, Offsets, Counts, Strides, Info); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, Info); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -47,7 +47,8 @@ static const Expr *checkMapClauseExpressionBase( Sema &SemaRef, Expr *E, OMPClauseMappableExprCommon::MappableExprComponentList &CurComponents, - OpenMPClauseKind CKind, bool NoDiagnose); + bool &IsNonContiguous, OpenMPClauseKind CKind, OpenMPDirectiveKind DKind, + bool NoDiagnose); namespace { /// Default data sharing attributes, which can be applied to directive. @@ -3527,7 +3528,10 @@ } if (isOpenMPTargetExecutionDirective(DKind)) { OMPClauseMappableExprCommon::MappableExprComponentList CurComponents; - if (!checkMapClauseExpressionBase(SemaRef, E, CurComponents, OMPC_map, + bool IsNonContiguous = false; + if (!checkMapClauseExpressionBase(SemaRef, E, CurComponents, + IsNonContiguous, OMPC_map, + Stack->getCurrentDirective(), /*NoDiagnose=*/true)) return; const auto *VD = cast( @@ -16350,7 +16354,9 @@ class MapBaseChecker final : public StmtVisitor { Sema &SemaRef; OpenMPClauseKind CKind = OMPC_unknown; + OpenMPDirectiveKind DKind = OMPD_unknown; OMPClauseMappableExprCommon::MappableExprComponentList &Components; + bool &IsNonContiguousRef; bool NoDiagnose = false; const Expr *RelevantExpr = nullptr; bool AllowUnitySizeArraySection = true; @@ -16528,6 +16534,9 @@ // pointer. Otherwise, only unitary sections are accepted. if (NotWhole || IsPointer) AllowWholeSizeArraySection = false; + } else if (DKind == OMPD_target_update && + SemaRef.getLangOpts().OpenMP >= 50) { + IsNonContiguousRef = true; } else if (AllowUnitySizeArraySection && NotUnity) { // A unity or whole array section is not allowed and that is not // compatible with the properties of the current array section. @@ -16620,11 +16629,13 @@ return RelevantExpr; } explicit MapBaseChecker( - Sema &SemaRef, OpenMPClauseKind CKind, + Sema &SemaRef, OpenMPClauseKind CKind, OpenMPDirectiveKind DKind, OMPClauseMappableExprCommon::MappableExprComponentList &Components, - bool NoDiagnose, SourceLocation &ELoc, SourceRange &ERange) - : SemaRef(SemaRef), CKind(CKind), Components(Components), - NoDiagnose(NoDiagnose), ELoc(ELoc), ERange(ERange) {} + bool &IsNonContiguousTargetUpdate, bool NoDiagnose, SourceLocation &ELoc, + SourceRange &ERange) + : SemaRef(SemaRef), CKind(CKind), DKind(DKind), Components(Components), + IsNonContiguousRef(IsNonContiguousTargetUpdate), NoDiagnose(NoDiagnose), + ELoc(ELoc), ERange(ERange) {} }; } // namespace @@ -16635,11 +16646,12 @@ static const Expr *checkMapClauseExpressionBase( Sema &SemaRef, Expr *E, OMPClauseMappableExprCommon::MappableExprComponentList &CurComponents, - OpenMPClauseKind CKind, bool NoDiagnose) { + bool &IsNonContiguousTargetUpdate, OpenMPClauseKind CKind, + OpenMPDirectiveKind DKind, bool NoDiagnose) { SourceLocation ELoc = E->getExprLoc(); SourceRange ERange = E->getSourceRange(); - MapBaseChecker Checker(SemaRef, CKind, CurComponents, NoDiagnose, ELoc, - ERange); + MapBaseChecker Checker(SemaRef, CKind, DKind, CurComponents, + IsNonContiguousTargetUpdate, NoDiagnose, ELoc, ERange); if (Checker.Visit(E->IgnoreParens())) return Checker.getFoundBase(); return nullptr; @@ -17017,6 +17029,8 @@ SmallVector VarBaseDeclarations; // The reference to the user-defined mapper associated with every expression. SmallVector UDMapperList; + // The list of whether the expression is non-contiguous or not + SmallVector IsNonContiguousList; MappableVarListInfo(ArrayRef VarList) : VarList(VarList) { // We have a list of components and base declarations for each entry in the @@ -17113,12 +17127,15 @@ } OMPClauseMappableExprCommon::MappableExprComponentList CurComponents; + bool IsNonContiguousTargetUpdate = false; ValueDecl *CurDeclaration = nullptr; // Obtain the array or member expression bases if required. Also, fill the // components array with all the components identified in the process. const Expr *BE = checkMapClauseExpressionBase( - SemaRef, SimpleExpr, CurComponents, CKind, /*NoDiagnose=*/false); + SemaRef, SimpleExpr, CurComponents, IsNonContiguousTargetUpdate, CKind, + DSAS->getCurrentDirective(), + /*NoDiagnose=*/false); if (!BE) continue; @@ -17141,6 +17158,7 @@ MVLI.VarComponents.back().append(CurComponents.begin(), CurComponents.end()); MVLI.VarBaseDeclarations.push_back(nullptr); + MVLI.IsNonContiguousList.push_back(IsNonContiguousTargetUpdate); continue; } @@ -17318,6 +17336,7 @@ CurComponents.end()); MVLI.VarBaseDeclarations.push_back(isa(BE) ? nullptr : CurDeclaration); + MVLI.IsNonContiguousList.push_back(IsNonContiguousTargetUpdate); } } @@ -17355,11 +17374,11 @@ // We need to produce a map clause even if we don't have variables so that // other diagnostics related with non-existing map clauses are accurate. - return OMPMapClause::Create(Context, Locs, MVLI.ProcessedVarList, - MVLI.VarBaseDeclarations, MVLI.VarComponents, - MVLI.UDMapperList, Modifiers, ModifiersLoc, - MapperIdScopeSpec.getWithLocInContext(Context), - MapperId, MapType, IsMapTypeImplicit, MapLoc); + return OMPMapClause::Create( + Context, Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + MVLI.VarComponents, MVLI.IsNonContiguousList, MVLI.UDMapperList, + Modifiers, ModifiersLoc, MapperIdScopeSpec.getWithLocInContext(Context), + MapperId, MapType, IsMapTypeImplicit, MapLoc); } QualType Sema::ActOnOpenMPDeclareReductionType(SourceLocation TyLoc, @@ -18272,7 +18291,7 @@ return OMPToClause::Create( Context, Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, - MVLI.VarComponents, MVLI.UDMapperList, + MVLI.VarComponents, MVLI.IsNonContiguousList, MVLI.UDMapperList, MapperIdScopeSpec.getWithLocInContext(Context), MapperId); } @@ -18289,7 +18308,7 @@ return OMPFromClause::Create( Context, Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, - MVLI.VarComponents, MVLI.UDMapperList, + MVLI.VarComponents, MVLI.IsNonContiguousList, MVLI.UDMapperList, MapperIdScopeSpec.getWithLocInContext(Context), MapperId); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -12599,6 +12599,13 @@ AssociatedExpr, AssociatedDecl)); } C->setComponents(Components, ListSizes); + + SmallVector ListNonContiguous; + ListNonContiguous.reserve(TotalLists); + for (unsigned i = 0; i < TotalLists; ++i) { + ListNonContiguous.push_back(Record.readBool()); + } + C->setNonContiguousLists(ListNonContiguous); } void OMPClauseReader::VisitOMPFromClause(OMPFromClause *C) { @@ -12649,6 +12656,13 @@ AssociatedExpr, AssociatedDecl)); } C->setComponents(Components, ListSizes); + + SmallVector ListNonContiguous; + ListNonContiguous.reserve(TotalLists); + for (unsigned i = 0; i < TotalLists; ++i) { + ListNonContiguous.push_back(Record.readBool()); + } + C->setNonContiguousLists(ListNonContiguous); } void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6574,6 +6574,8 @@ Record.AddStmt(M.getAssociatedExpression()); Record.AddDeclRef(M.getAssociatedDeclaration()); } + for (auto NC : C->non_contiguous_lists()) + Record.push_back(NC); } void OMPClauseWriter::VisitOMPFromClause(OMPFromClause *C) { @@ -6598,6 +6600,8 @@ Record.AddStmt(M.getAssociatedExpression()); Record.AddDeclRef(M.getAssociatedDeclaration()); } + for (auto NC : C->non_contiguous_lists()) + Record.push_back(NC); } void OMPClauseWriter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) { diff --git a/clang/test/OpenMP/target_update_ast_print.cpp b/clang/test/OpenMP/target_update_ast_print.cpp --- a/clang/test/OpenMP/target_update_ast_print.cpp +++ b/clang/test/OpenMP/target_update_ast_print.cpp @@ -5,6 +5,14 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + +// RUN: %clang_cc1 -DOMP5 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s --check-prefix=OMP5 +// RUN: %clang_cc1 -DOMP5 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s --check-prefix=OMP5 + +// RUN: %clang_cc1 -DOMP5 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s --check-prefix=OMP5 +// RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s --check-prefix=OMP5 // expected-no-diagnostics #ifndef HEADER @@ -20,23 +28,64 @@ #pragma omp target update to(([a][targ])p, a) if(l>5) device(l) nowait depend(inout:l) #pragma omp target update from(b, ([a][targ])p) if(l<5) device(l-1) nowait depend(inout:l) + +#ifdef OMP5 + U marr[10][10][10]; +#pragma omp target update to(marr[2][0:2][0:2]) + +#pragma omp target update from(marr[2][0:2][0:2]) + +#pragma omp target update to(marr[:][0:2][0:2]) + +#pragma omp target update from(marr[:][0:2][0:2]) + +#pragma omp target update to(marr[:][:l][l:]) + +#pragma omp target update from(marr[:][:l][l:]) + +#pragma omp target update to(marr[:2][:1][:]) + +#pragma omp target update from(marr[:2][:1][:]) + +#pragma omp target update to(marr[:2][:][:1]) + +#pragma omp target update from(marr[:2][:][:1]) + +#pragma omp target update to(marr[:2][:][1:]) + +#pragma omp target update from(marr[:2][:][1:]) + +#pragma omp target update to(marr[:1][3:2][:2]) + +#pragma omp target update from(marr[:1][3:2][:2]) + +#pragma omp target update to(marr[:1][:2][0]) + +#pragma omp target update from(marr[:1][:2][0]) + +// OMP5: marr[10][10][10]; +// OMP5-NEXT: #pragma omp target update to(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:][:l][l:]) +// OMP5-NEXT: #pragma omp target update from(marr[:][:l][l:]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:1][:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:1][:]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][:2][0]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][:2][0]) +#endif + return a + targ + (T)b; } // CHECK: static T a, *p; // CHECK-NEXT: U b; -// CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l){{$}} -// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) -// CHECK: static int a, *p; -// CHECK-NEXT: float b; -// CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l) -// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) -// CHECK: static char a, *p; -// CHECK-NEXT: float b; -// CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l) -// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) int main(int argc, char **argv) { static int a; @@ -50,6 +99,40 @@ // CHECK-NEXT: #pragma omp target update to(a) if(f > 0.) device(n) nowait depend(in : n) #pragma omp target update from(f) if(f<0.0) device(n+1) nowait depend(in:n) // CHECK-NEXT: #pragma omp target update from(f) if(f < 0.) device(n + 1) nowait depend(in : n) + +#ifdef OMP5 + float marr[10][10][10]; +// OMP5: marr[10][10][10]; +#pragma omp target update to(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[2][0:2][0:2]) +#pragma omp target update from(marr[2][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[2][0:2][0:2]) +#pragma omp target update to(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:][0:2][0:2]) +#pragma omp target update from(marr[:][0:2][0:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:][0:2][0:2]) +#pragma omp target update to(marr[:][:n][n:]) +// OMP5: #pragma omp target update to(marr[:][:n][n:]) +#pragma omp target update from(marr[:2][:1][:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:1][:]) +#pragma omp target update to(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][:1]) +#pragma omp target update from(marr[:2][:][:1]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][:1]) +#pragma omp target update to(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update to(marr[:2][:][1:]) +#pragma omp target update from(marr[:2][:][1:]) +// OMP5-NEXT: #pragma omp target update from(marr[:2][:][1:]) +#pragma omp target update to(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][3:2][:2]) +#pragma omp target update from(marr[:1][3:2][:2]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][3:2][:2]) +#pragma omp target update to(marr[:1][:2][0]) +// OMP5-NEXT: #pragma omp target update to(marr[:1][:2][0]) +#pragma omp target update from(marr[:1][:2][0]) +// OMP5-NEXT: #pragma omp target update from(marr[:1][:2][0]) +#endif + return foo(argc, f) + foo(argv[0][0], f) + a; } diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp --- a/clang/test/OpenMP/target_update_codegen.cpp +++ b/clang/test/OpenMP/target_update_codegen.cpp @@ -1059,5 +1059,142 @@ #pragma omp target update from(([sa][5])f) } +#endif + +///==========================================================================/// +// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-64 +// RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-64 +// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-32 +// RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK19 --check-prefix CK19-32 + +// RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// SIMD-ONLY19-NOT: {{__kmpc|__tgt}} +#ifdef CK19 + +// CK19: [[STRUCT_DESCRIPTOR:%.+]] = type { i64, i64, i64 } + +// CK19: [[MSIZE:@.+]] = {{.+}}constant [1 x i64] [i64 3] +// CK19: [[MTYPE:@.+]] = {{.+}}constant [1 x i64] [i64 2081] + +// CK19-LABEL: _Z3foo +void foo(int arg) { + int arr[3][4][5]; + + // CK19: [[DIMS:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]], + // CK19: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [5 x i32]]], [3 x [4 x [5 x i32]]]* [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK19: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [5 x i32]], [4 x [5 x i32]]* [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 + // CK19: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x i32], [5 x i32]* [[ARRAY_DECAY]], {{.+}} + // CK19: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x i32], [5 x i32]* [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0 + // CK19: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds {{.+}}, {{.+}}* [[ARRAY_DECAY_2]], {{.+}} 1 + // CK19: [[LEN:%.+]] = sub nuw i64 4, [[ARG_ADDR:%.+]] + // CK19: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i{{.+}} 0, i{{.+}} 0 + // CK19: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i{{.+}} 0, i{{.+}} 0 + // CK19: [[DIM_1:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 0 + // CK19: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 0 + // CK19: store i64 0, i64* [[OFFSET]], + // CK19: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 1 + // CK19: store i64 2, i64* [[COUNT]], + // CK19: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 2 + // CK19: store i64 80, i64* [[STRIDE]], + // CK19: [[DIM_2:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 1 + // CK19: [[OFFSET_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 0 + // CK19: store i64 [[ARG:%.+]], i64* [[OFFSET_2]], + // CK19: [[COUNT_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 1 + // CK19: store i64 [[LEN]], i64* [[COUNT_2]], + // CK19: [[STRIDE_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 2 + // CK19: store i64 20, i64* [[STRIDE_2]], + // CK19: [[DIM_3:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 2 + // CK19: [[OFFSET_3:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 0 + // CK19: store i64 1, i64* [[OFFSET_3]], + // CK19: [[COUNT_3:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 1 + // CK19: store i64 4, i64* [[COUNT_3]], + // CK19: [[STRIDE_3:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_3]], {{.+}} 0, {{.+}} 2 + // CK19: store i64 4, i64* [[STRIDE_3]], + + // CK19-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MSIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}}) + // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK19-DAG: [[PC0:%.+]] = bitcast [3 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]] to i8* + // CK19-DAG: [[PTRS:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i32 0, i32 0 + // CK19-DAG: store i8* [[PC0]], i8** [[PTRS]], + +#pragma omp target update to(arr[0:2][arg:][1:4]) + {++arg;} +} + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK20 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-64 +// RUN: %clang_cc1 -DCK20 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-64 +// RUN: %clang_cc1 -DCK20 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-32 +// RUN: %clang_cc1 -DCK20 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK20 --check-prefix CK20-32 + +// RUN: %clang_cc1 -DCK20 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK20 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK20 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// RUN: %clang_cc1 -DCK20 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s +// SIMD-ONLY19-NOT: {{__kmpc|__tgt}} +#ifdef CK20 + +struct ST { + int a; + double *b; +}; + +// CK20: [[STRUCT_ST:%.+]] = type { i32, double* } +// CK20: [[STRUCT_DESCRIPTOR:%.+]] = type { i64, i64, i64 } + +// CK20: [[MSIZE:@.+]] = {{.+}}constant [1 x i64] [i64 2] +// CK20: [[MTYPE:@.+]] = {{.+}}constant [1 x i64] [i64 2081] + +// CK20-LABEL: _Z3foo +void foo(int arg) { + ST arr[3][4]; + // CK20: [[DIMS:%.+]] = alloca [2 x [[STRUCT_DESCRIPTOR]]], + // CK20: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [[STRUCT_ST]]]], [3 x [4 x [[STRUCT_ST]]]]* [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK20: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [[STRUCT_ST]]], [4 x [[STRUCT_ST]]]* [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 + // CK20: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [[STRUCT_ST]], [[STRUCT_ST]]* [[ARRAY_DECAY]], {{.+}} + // CK20: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], {{.+}} 0, {{.+}} 0 + // CK20: [[BPC:%.+]] = bitcast i8** [[BP0]] to [3 x [4 x [[STRUCT_ST]]]]** + // CK20: store [3 x [4 x [[STRUCT_ST]]]]* [[ARR]], [3 x [4 x [[STRUCT_ST]]]]** [[BPC]], + // CK20: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], {{.+}} 0, {{.+}} 0 + // CK20: [[PC:%.+]] = bitcast i8** [[P0]] to [[STRUCT_ST]]** + // CK20: store [[STRUCT_ST]]* [[ARRAY_IDX_1]], [[STRUCT_ST]]** [[PC]], + // CK20: [[DIM_1:%.+]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR]]], [2 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 0 + // CK20: [[OFFSET:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 0 + // CK20: store i64 0, i64* [[OFFSET]], + // CK20: [[COUNT:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 1 + // CK20: store i64 2, i64* [[COUNT]], + // CK20: [[STRIDE:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_1]], {{.+}} 0, {{.+}} 2 + // CK20: store i64 {{32|64}}, i64* [[STRIDE]], + // CK20: [[DIM_2:%.+]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR]]], [2 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]], {{.+}} 0, {{.+}} 1 + // CK20: [[OFFSET_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 0 + // CK20: store i64 1, i64* [[OFFSET_2]], + // CK20: [[COUNT_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 1 + // CK20: store i64 4, i64* [[COUNT_2]], + // CK20: [[STRIDE_2:%.+]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], [[STRUCT_DESCRIPTOR]]* [[DIM_2]], {{.+}} 0, {{.+}} 2 + // CK20: store i64 {{8|16}}, i64* [[STRIDE_2]], + // CK20-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MSIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}}) + // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK20-DAG: [[PC0:%.+]] = bitcast [2 x [[STRUCT_DESCRIPTOR]]]* [[DIMS]] to i8* + // CK20-DAG: [[PTRS:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i32 0, i32 0 + // CK20-DAG: store i8* [[PC0]], i8** [[PTRS]], + +#pragma omp target update to(arr[0:2][1:4]) + {++arg;} +} + #endif #endif