diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2568,7 +2568,11 @@ */ CXCursor_OMPScanDirective = 287, - CXCursor_LastStmt = CXCursor_OMPScanDirective, + /** OpenMP tile directive. + */ + CXCursor_OMPTileDirective = 288, + + CXCursor_LastStmt = CXCursor_OMPTileDirective, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -792,6 +792,100 @@ } }; +/// This represents the 'sizes' clause in the '#pragma omp tile' directive. +/// +/// \code +/// #pragma omp tile sizes(5,5) +/// for (int i = 0; i < 64; ++i) +/// for (int j = 0; j < 64; ++j) +/// \endcode +class OMPSizesClause final + : public OMPClause, + private llvm::TrailingObjects { + friend class OMPClauseReader; + friend class llvm::TrailingObjects; + + /// Location of '('. + SourceLocation LParenLoc; + + /// Number of tile sizes in the clause. + unsigned NumSizes; + + /// Build an empty clause. + explicit OMPSizesClause(int NumSizes) + : OMPClause(llvm::omp::OMPC_sizes, SourceLocation(), SourceLocation()), + NumSizes(NumSizes) {} + +public: + /// Build a 'sizes' AST node. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the 'sizes' identifier. + /// \param LParenLoc Location of '('. + /// \param EndLoc Location of ')'. + /// \param Sizes Content of the clause. + static OMPSizesClause *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, + ArrayRef Sizes); + + /// Build an empty 'sizes' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param Sizes Number of items in the clause. + static OMPSizesClause *CreateEmpty(const ASTContext &C, unsigned NumSizes); + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns the number of list items. + unsigned getNumSizes() const { return NumSizes; } + + /// Returns the tile size expressions. + MutableArrayRef getSizesRefs() { + return MutableArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumSizes); + } + ArrayRef getSizesRefs() const { + return ArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumSizes); + } + + /// Sets the tile size expressions. + void setSizesRefs(ArrayRef VL) { + assert(VL.size() == NumSizes); + std::copy(VL.begin(), VL.end(), + static_cast(this) + ->template getTrailingObjects()); + } + + child_range children() { + MutableArrayRef Sizes = getSizesRefs(); + return child_range(reinterpret_cast(Sizes.begin()), + reinterpret_cast(Sizes.end())); + } + const_child_range children() const { + ArrayRef Sizes = getSizesRefs(); + return const_child_range(reinterpret_cast(Sizes.begin()), + reinterpret_cast(Sizes.end())); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_sizes; + } +}; + /// This represents 'collapse' clause in the '#pragma omp ...' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2832,6 +2832,9 @@ DEF_TRAVERSE_STMT(OMPSimdDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPTileDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPForDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) @@ -3071,6 +3074,13 @@ return true; } +template +bool RecursiveASTVisitor::VisitOMPSizesClause(OMPSizesClause *C) { + for (Expr *E : C->getSizesRefs()) + TRY_TO(TraverseStmt(E)); + return true; +} + template bool RecursiveASTVisitor::VisitOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -87,6 +87,11 @@ *child_begin() = S; } + /// Returns the number of Stmt*'s trailing the object. + /// + /// This is not the number of elements returned by children(). + unsigned getNumStmts() const { return NumChildren; } + public: /// Iterates over expressions/statements used in the construct. class used_clauses_child_iterator @@ -264,6 +269,8 @@ // // \param RegionKind Component region kind. const CapturedStmt *getCapturedStmt(OpenMPDirectiveKind RegionKind) const { + assert(!isOpenMPLoopTransformationDirective(RegionKind) && + "Loop transformations do not capture"); SmallVector CaptureRegions; getOpenMPCaptureRegions(CaptureRegions, getDirectiveKind()); assert(std::any_of( @@ -281,6 +288,9 @@ /// Get innermost captured statement for the construct. CapturedStmt *getInnermostCapturedStmt() { + assert( + !isOpenMPLoopTransformationDirective(getDirectiveKind()) && + "Loop transformations do not capture; consider using ignoreCaptures()"); assert(hasAssociatedStmt() && getAssociatedStmt() && "Must have associated statement."); SmallVector CaptureRegions; @@ -298,6 +308,31 @@ ->getInnermostCapturedStmt(); } + /// Return the associated code without captures. + /// + /// Methods with similar functionality: + /// + /// * IgnoreContainers(true) - Not specific to OpenMP; also ignores + /// containers/captures potentially inside the associated code. + /// + /// * getCapturedStmt(Kind) - Return the capture of the specified kind; + /// requires the captured decl/stmt to exist. + /// + /// * getInnermostCapturedStmt() - Returns the innermost capture containing + /// the associated code; also cannot be used if there is no CapturedStmt, + /// e.g. for loop transformations. + /// + /// * getBody() - In addition to ignoring captures, also skips associated + /// loops. + /// + /// * getStructuredBlock() - For loop-associated directives, returns the + /// body; for others, returns the captured statement. + /// + Stmt *ignoreCaptures(); + const Stmt *ignoreCaptures() const { + return const_cast(this)->ignoreCaptures(); + } + OpenMPDirectiveKind getDirectiveKind() const { return Kind; } static bool classof(const Stmt *S) { @@ -451,6 +486,10 @@ /// for loop when combined with a previous distribute loop in the same pragma /// (e.g. 'distribute parallel for') /// + /// Loop transformations, such as OMPTileDirective, only use + /// AssociatedStmtOffset. Its specific statments/expressions are stored as + /// object members. + /// enum { AssociatedStmtOffset = 0, IterationVariableOffset = 1, @@ -498,6 +537,7 @@ /// Get the counters storage. MutableArrayRef getCounters() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast( &(*(std::next(child_begin(), getArraysOffset(getDirectiveKind()))))); return MutableArrayRef(Storage, CollapsedNum); @@ -505,6 +545,7 @@ /// Get the private counters storage. MutableArrayRef getPrivateCounters() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast(&*std::next( child_begin(), getArraysOffset(getDirectiveKind()) + CollapsedNum)); return MutableArrayRef(Storage, CollapsedNum); @@ -512,6 +553,7 @@ /// Get the updates storage. MutableArrayRef getInits() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast( &*std::next(child_begin(), getArraysOffset(getDirectiveKind()) + 2 * CollapsedNum)); @@ -520,6 +562,7 @@ /// Get the updates storage. MutableArrayRef getUpdates() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast( &*std::next(child_begin(), getArraysOffset(getDirectiveKind()) + 3 * CollapsedNum)); @@ -528,6 +571,7 @@ /// Get the final counter updates storage. MutableArrayRef getFinals() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast( &*std::next(child_begin(), getArraysOffset(getDirectiveKind()) + 4 * CollapsedNum)); @@ -536,6 +580,7 @@ /// Get the dependent counters storage. MutableArrayRef getDependentCounters() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast( &*std::next(child_begin(), getArraysOffset(getDirectiveKind()) + 5 * CollapsedNum)); @@ -544,6 +589,7 @@ /// Get the dependent inits storage. MutableArrayRef getDependentInits() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast( &*std::next(child_begin(), getArraysOffset(getDirectiveKind()) + 6 * CollapsedNum)); @@ -552,6 +598,7 @@ /// Get the finals conditions storage. MutableArrayRef getFinalsConditions() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); Expr **Storage = reinterpret_cast( &*std::next(child_begin(), getArraysOffset(getDirectiveKind()) + 7 * CollapsedNum)); @@ -581,6 +628,8 @@ /// Offset to the start of children expression arrays. static unsigned getArraysOffset(OpenMPDirectiveKind Kind) { + if (isOpenMPLoopTransformationDirective(Kind)) + return AssociatedStmtOffset + 1; if (isOpenMPLoopBoundSharingDirective(Kind)) return CombinedDistributeEnd; if (isOpenMPWorksharingDirective(Kind) || isOpenMPTaskLoopDirective(Kind) || @@ -592,6 +641,8 @@ /// Children number. static unsigned numLoopChildren(unsigned CollapsedNum, OpenMPDirectiveKind Kind) { + if (isOpenMPLoopTransformationDirective(Kind)) + return getArraysOffset(Kind); return getArraysOffset(Kind) + 8 * CollapsedNum; // Counters, PrivateCounters, Inits, // Updates, Finals, DependentCounters, @@ -833,6 +884,10 @@ /// the end of the assigned distribute chunk) /// expression UB = min (UB, PrevUB) Expr *PrevEUB; + /// The associated loops. + SmallVector Loops; + /// The body of each associated loop. + SmallVector Bodys; /// Counters Loop counters. SmallVector Counters; /// PrivateCounters Loop counters. @@ -898,6 +953,8 @@ DependentCounters.resize(Size); DependentInits.resize(Size); FinalsConditions.resize(Size); + Loops.resize(Size); + Bodys.resize(Size); for (unsigned i = 0; i < Size; ++i) { Counters[i] = nullptr; PrivateCounters[i] = nullptr; @@ -907,6 +964,8 @@ DependentCounters[i] = nullptr; DependentInits[i] = nullptr; FinalsConditions[i] = nullptr; + Loops[i] = nullptr; + Bodys[i] = nullptr; } PreInits = nullptr; DistCombinedFields.LB = nullptr; @@ -925,37 +984,48 @@ unsigned getCollapsedNumber() const { return CollapsedNum; } Expr *getIterationVariable() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return const_cast(reinterpret_cast( *std::next(child_begin(), IterationVariableOffset))); } Expr *getLastIteration() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return const_cast(reinterpret_cast( *std::next(child_begin(), LastIterationOffset))); } Expr *getCalcLastIteration() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return const_cast(reinterpret_cast( *std::next(child_begin(), CalcLastIterationOffset))); } Expr *getPreCond() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return const_cast(reinterpret_cast( *std::next(child_begin(), PreConditionOffset))); } Expr *getCond() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return const_cast( reinterpret_cast(*std::next(child_begin(), CondOffset))); } Expr *getInit() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return const_cast( reinterpret_cast(*std::next(child_begin(), InitOffset))); } Expr *getInc() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return const_cast( reinterpret_cast(*std::next(child_begin(), IncOffset))); } const Stmt *getPreInits() const { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); + return *std::next(child_begin(), PreInitsOffset); + } + Stmt *getPreInits() { + assert(!isOpenMPLoopTransformationDirective(getDirectiveKind())); return *std::next(child_begin(), PreInitsOffset); } - Stmt *getPreInits() { return *std::next(child_begin(), PreInitsOffset); } Expr *getIsLastIterVariable() const { assert((isOpenMPWorksharingDirective(getDirectiveKind()) || isOpenMPTaskLoopDirective(getDirectiveKind()) || @@ -1102,13 +1172,32 @@ /// CurStmt. /// \param TryImperfectlyNestedLoops true, if we need to try to look for the /// imperfectly nested loop. - static Stmt *tryToFindNextInnerLoop(Stmt *CurStmt, - bool TryImperfectlyNestedLoops); - static const Stmt *tryToFindNextInnerLoop(const Stmt *CurStmt, - bool TryImperfectlyNestedLoops) { + /// \param PreInits If set, collects additional statemants that have to be + /// executed before the loop; typically variable declarations. + static Stmt * + tryToFindNextInnerLoop(Stmt *CurStmt, bool TryImperfectlyNestedLoops, + llvm::SmallVectorImpl *PreInits = nullptr); + static const Stmt * + tryToFindNextInnerLoop(const Stmt *CurStmt, bool TryImperfectlyNestedLoops, + llvm::SmallVectorImpl *PreInits = nullptr) { return tryToFindNextInnerLoop(const_cast(CurStmt), - TryImperfectlyNestedLoops); + TryImperfectlyNestedLoops, PreInits); + } + + /// Collect the loops associated to this directive, taking loop + /// transformations into account. + /// + /// \param Loops Receives the associated loops. + /// \param PreInits Receives statements that have to be executed before the + /// loop; typically variable declarations. + void collectAssociatedLoops(llvm::SmallVectorImpl &Loops, + llvm::SmallVectorImpl &PreInits); + void collectAssociatedLoops(llvm::SmallVectorImpl &Loops, + llvm::SmallVectorImpl &PreInits) const { + const_cast(this)->collectAssociatedLoops( + *reinterpret_cast *>(&Loops), PreInits); } + Stmt *getBody(); const Stmt *getBody() const { return const_cast(this)->getBody(); @@ -1165,6 +1254,7 @@ static bool classof(const Stmt *T) { return T->getStmtClass() == OMPSimdDirectiveClass || T->getStmtClass() == OMPForDirectiveClass || + T->getStmtClass() == OMPTileDirectiveClass || T->getStmtClass() == OMPForSimdDirectiveClass || T->getStmtClass() == OMPParallelForDirectiveClass || T->getStmtClass() == OMPParallelForSimdDirectiveClass || @@ -4838,6 +4928,110 @@ } }; +/// This represents the '#pragma omp tile' loop transformation directive. +class OMPTileDirective final + : public OMPLoopDirective, + private llvm::TrailingObjects { + friend class ASTStmtReader; + friend TrailingObjects; + + /// The loop nest after being tiled. + Stmt *TransformedStmt = nullptr; + + size_t numTrailingObjects(OverloadToken) const { + return getNumClauses(); + } + + size_t numTrailingObjects(OverloadToken) const { + return getNumStmts(); + } + + explicit OMPTileDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumClauses, unsigned NumLoops) + : OMPLoopDirective(this, OMPTileDirectiveClass, llvm::omp::OMPD_tile, + StartLoc, EndLoc, NumLoops, NumClauses, + /*NumSpecialChildren=*/0) {} + +public: + /// Create a new AST node represention '#pragma omp tile'. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the introducer (e.g. the 'omp' token). + /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param Clauses The directive's clauses. + /// \param NumLoops Number of associated loops (number of items in the + /// 'sizes' clause). \param AssociatedStmt The outermost + /// associated loop. + /// \param TransformedStmt The loop nest after tiling, or nullptr in + /// dependenct contexts. + static OMPTileDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, + unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt); + + /// Build an empty '#pragma omp tile' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumClauses Number of clauses to allocate. + /// \param NumLoops Number of associated loops to allocate. + static OMPTileDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, + unsigned NumLoops); + + unsigned getNumAssociatedLoops() const { return getCollapsedNumber(); } + + /// Gets/sets the associated loops after tiling. + /// + /// This is in de-sugared format stored as a CompoundStmt. + /// + /// \code + /// { + /// PreInits; + /// for (...) + /// ... + /// } + /// \endcode + /// + /// Note that if the generated loops a become associated loops of another + /// directive, they may need to be hoisted before them. + Stmt *getTransformedStmt() const { return TransformedStmt; } + void setTransformedStmt(Stmt *S) { + assert(!S || isa(S)); + TransformedStmt = S; + } + + /// Return the pre-init statements. + /// + /// These must be executed before the loop. Typically, these are declarations + /// for compiler-introduced variables. + auto getPreInits() const { + CompoundStmt *C = cast(TransformedStmt); + return llvm::make_range(C->body_begin(), C->body_begin() + C->size() - 1); + } + + /// Return the transformed (tiled) for-loop. + Stmt *getTransformedForStmt() { + CompoundStmt *C = cast(TransformedStmt); + return C->body_back(); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPTileDirectiveClass; + } +}; + +/// Determine the first associated statement of a loop-associated directive. If +/// the statement is a loop transformation directive, uses the result of the +/// loop transformation. +Stmt * +getTopmostAssociatedStructuredBlock(Stmt *S, + llvm::SmallVectorImpl *PreInits); +static inline const Stmt * +getTopmostAssociatedStructuredBlock(const Stmt *S, + llvm::SmallVectorImpl *PreInits) { + return getTopmostAssociatedStructuredBlock(const_cast(S), PreInits); +} + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -330,6 +330,8 @@ // OpenMP def err_omp_more_one_clause : Error< "directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">; +def err_omp_required_clause : Error< + "directive '#pragma omp %0' requires the '%1' clause">; // Static Analyzer Core def err_unknown_analyzer_checker_or_package : Error< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10222,6 +10222,8 @@ "the referenced item is not found in any private clause on the same directive">; def err_omp_stmt_depends_on_loop_counter : Error< "the loop %select{initializer|condition}0 expression depends on the current loop control variable">; +def err_omp_invariant_dependency : Error< + "expected loop invariant expression">; def err_omp_invariant_or_linear_dependency : Error< "expected loop invariant expression or ' * %0 + ' kind of expression">; def err_omp_wrong_dependency_iterator_type : Error< diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -270,6 +270,11 @@ /// functions bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind); +/// Checks if the specified directive is a loop transformation directive. +/// \param DKind Specified directive. +/// \return True iff the directive is a loop transformation. +bool isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind); + /// Return the captured regions of an OpenMP directive. void getOpenMPCaptureRegions( llvm::SmallVectorImpl &CaptureRegions, diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -220,6 +220,7 @@ def OMPLoopDirective : StmtNode; def OMPParallelDirective : StmtNode; def OMPSimdDirective : StmtNode; +def OMPTileDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3171,6 +3171,10 @@ OMPClause *ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind, OpenMPClauseKind Kind, bool ParseOnly); + + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. + OMPClause *ParseOpenMPSizesClause(); + /// Parses clause without any additional arguments. /// /// \param Kind Kind of current clause. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10174,6 +10174,12 @@ ActOnOpenMPSimdDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '#pragma omp tile' after parsing of its clauses and + /// the associated statement. + StmtResult + ActOnOpenMPTileDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA); /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult @@ -10435,6 +10441,8 @@ ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + + /// Checks correctness of linear modifiers. bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind, SourceLocation LinLoc); @@ -10510,6 +10518,11 @@ OMPClause *ActOnOpenMPSimdlenClause(Expr *Length, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-form 'sizes' clause. + OMPClause *ActOnOpenMPSizesClause(ArrayRef SizeExprs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'collapse' clause. OMPClause *ActOnOpenMPCollapseClause(Expr *NumForLoops, SourceLocation StartLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1883,6 +1883,7 @@ // OpenMP directives STMT_OMP_PARALLEL_DIRECTIVE, STMT_OMP_SIMD_DIRECTIVE, + STMT_OMP_TILE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -100,6 +100,7 @@ case OMPC_proc_bind: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -186,6 +187,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -897,6 +899,25 @@ return new (Mem) OMPInReductionClause(N); } +OMPSizesClause *OMPSizesClause::Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, + ArrayRef Sizes) { + OMPSizesClause *Clause = CreateEmpty(C, Sizes.size()); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setLocEnd(EndLoc); + Clause->setSizesRefs(Sizes); + return Clause; +} + +OMPSizesClause *OMPSizesClause::CreateEmpty(const ASTContext &C, + unsigned NumSizes) { + void *Mem = C.Allocate(totalSizeToAlloc(NumSizes)); + return new (Mem) OMPSizesClause(NumSizes); +} + OMPAllocateClause * OMPAllocateClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, @@ -1518,6 +1539,18 @@ OS << ")"; } +void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) { + OS << "sizes("; + bool First = true; + for (auto Size : Node->getSizesRefs()) { + if (!First) + OS << ", "; + Size->printPretty(OS, nullptr, Policy, 0); + First = false; + } + OS << ")"; +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -33,18 +33,55 @@ return !hasAssociatedStmt() || !getAssociatedStmt(); } +Stmt *OMPExecutableDirective::ignoreCaptures() { + SmallVector CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, getDirectiveKind()); + auto Result = getAssociatedStmt(); + for (auto ThisCaptureRegion : CaptureRegions) { + (void)ThisCaptureRegion; + Result = cast(Result)->getCapturedStmt(); + } + return Result; +} + const Stmt *OMPExecutableDirective::getStructuredBlock() const { assert(!isStandaloneDirective() && "Standalone Executable Directives don't have Structured Blocks."); if (auto *LD = dyn_cast(this)) return LD->getBody(); - return getInnermostCapturedStmt()->getCapturedStmt(); + return ignoreCaptures(); } -Stmt *OMPLoopDirective::tryToFindNextInnerLoop(Stmt *CurStmt, - bool TryImperfectlyNestedLoops) { +Stmt *clang::getTopmostAssociatedStructuredBlock( + Stmt *S, llvm::SmallVectorImpl *PreInits) { + assert(S && "Must be a valid statement"); + + while (true) { + S = S->IgnoreContainers(/* IgnoreCaptured */ true); + if (!isa(S) || + !isOpenMPLoopTransformationDirective( + cast(S)->getDirectiveKind())) + break; + + if (auto *D = dyn_cast(S)) { + if (PreInits) { + auto PreInit = D->getPreInits(); + PreInits->append(PreInit.begin(), PreInit.end()); + } + S = D->getTransformedForStmt(); + continue; + } + llvm_unreachable("Unhandled loop transformation"); + } + return S; +} + +Stmt *OMPLoopDirective::tryToFindNextInnerLoop( + Stmt *CurStmt, bool TryImperfectlyNestedLoops, + llvm::SmallVectorImpl *PreInits) { Stmt *OrigStmt = CurStmt; - CurStmt = CurStmt->IgnoreContainers(); + CurStmt = getTopmostAssociatedStructuredBlock(CurStmt, PreInits); + // Additional work for imperfectly nested loops, introduced in OpenMP 5.0. if (TryImperfectlyNestedLoops) { if (auto *CS = dyn_cast(CurStmt)) { @@ -85,10 +122,33 @@ return CurStmt; } +void OMPLoopDirective::collectAssociatedLoops( + llvm::SmallVectorImpl &Loops, + llvm::SmallVectorImpl &PreInits) { + Stmt *Body = ignoreCaptures()->IgnoreContainers(); + unsigned NumLoops = getCollapsedNumber(); + assert(NumLoops >= 1); + + // For each nest level, find the loop and its body. + // The topmost loop must not be surrounded by other code. + for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) { + Stmt *Loop = OMPLoopDirective::tryToFindNextInnerLoop( + Body, /*TryImperfectlyNestedLoops=*/(Cnt > 0), &PreInits); + Loops.push_back(Loop); + + // Get body to look next loop in + if (auto *For = dyn_cast(Loop)) { + Body = For->getBody(); + } else if (auto CXXFor = dyn_cast(Loop)) { + Body = CXXFor->getBody(); + } else + llvm_unreachable("Expected canonical for loop or range-based for loop."); + } +} + Stmt *OMPLoopDirective::getBody() { // This relies on the loop form is already checked by Sema. - Stmt *Body = - getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + Stmt *Body = getTopmostAssociatedStructuredBlock(ignoreCaptures(), nullptr); if (auto *For = dyn_cast(Body)) { Body = For->getBody(); } else { @@ -281,6 +341,28 @@ return new (Mem) OMPForDirective(CollapsedNum, NumClauses); } +OMPTileDirective * +OMPTileDirective::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, ArrayRef Clauses, + unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt) { + OMPTileDirective *Dir = CreateEmpty(C, Clauses.size(), NumLoops); + Dir->setLocStart(StartLoc); + Dir->setLocEnd(EndLoc); + Dir->setClauses(Clauses); + Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTransformedStmt(TransformedStmt); + return Dir; +} + +OMPTileDirective *OMPTileDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops) { + void *Mem = C.Allocate(totalSizeToAlloc( + NumClauses, numLoopChildren(NumLoops, OMPD_tile))); + return new (Mem) OMPTileDirective({}, {}, NumClauses, NumLoops); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -647,7 +647,7 @@ } OS << NL; if (!ForceNoStmt && S->hasAssociatedStmt()) - PrintStmt(S->getInnermostCapturedStmt()->getCapturedStmt()); + PrintStmt(S->ignoreCaptures()); } void StmtPrinter::VisitOMPParallelDirective(OMPParallelDirective *Node) { @@ -953,6 +953,11 @@ PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPTileDirective(OMPTileDirective *Node) { + Indent() << "#pragma omp tile"; + PrintOMPExecutableDirective(Node); +} + //===----------------------------------------------------------------------===// // Expr printing methods. //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -461,6 +461,12 @@ Profiler->VisitStmt(C->getSimdlen()); } +void OMPClauseProfiler::VisitOMPSizesClause(const OMPSizesClause *C) { + for (auto E : C->getSizesRefs()) + if (E) + Profiler->VisitExpr(E); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); @@ -859,6 +865,10 @@ VisitOMPLoopDirective(S); } +void StmtProfiler::VisitOMPTileDirective(const OMPTileDirective *S) { + VisitOMPLoopDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -4918,8 +4918,7 @@ } // Visit associated structured block if any. if (!D->isStandaloneDirective()) - if (CapturedStmt *CS = D->getInnermostCapturedStmt()) { - Stmt *S = CS->getCapturedStmt(); + if (Stmt *S = D->ignoreCaptures()) { if (!isa(S)) addLocalScopeAndDtors(S); if (CFGBlock *R = addStmt(S)) diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -127,6 +127,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -376,6 +377,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -450,7 +452,7 @@ DKind == OMPD_target_teams_distribute || DKind == OMPD_target_teams_distribute_parallel_for || DKind == OMPD_target_teams_distribute_parallel_for_simd || - DKind == OMPD_target_teams_distribute_simd; + DKind == OMPD_target_teams_distribute_simd || DKind == OMPD_tile; } bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) { @@ -577,6 +579,10 @@ Kind == OMPD_target_teams_distribute_parallel_for_simd; } +bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { + return DKind == OMPD_tile; +} + void clang::getOpenMPCaptureRegions( SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind) { @@ -660,6 +666,9 @@ case OMPD_distribute_simd: CaptureRegions.push_back(OMPD_unknown); break; + case OMPD_tile: + // loop transformations do not introduce captures. + break; case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6637,6 +6637,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -6952,6 +6953,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -8872,6 +8874,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -9662,6 +9665,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -10313,6 +10317,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -774,6 +774,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -854,6 +855,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -1027,6 +1029,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -1113,6 +1116,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -198,6 +198,9 @@ case Stmt::OMPSimdDirectiveClass: EmitOMPSimdDirective(cast(*S)); break; + case Stmt::OMPTileDirectiveClass: + EmitOMPTileDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -132,6 +132,17 @@ /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { + + // Collect loop nests and the pre-inits of associated loops. + SmallVector Loops; + SmallVector AssociatedPreInits; + S.collectAssociatedLoops(Loops, AssociatedPreInits); + + // Emit statements required by nested loop transformations. Has to be done + // before PreCondVars. + for (Stmt *APreInit : AssociatedPreInits) + CGF.EmitStmt(APreInit); + CodeGenFunction::OMPMapVars PreCondVars; llvm::DenseSet EmittedAsPrivate; for (const auto *E : S.counters()) { @@ -156,24 +167,16 @@ } (void)PreCondVars.apply(CGF); // Emit init, __range and __end variables for C++ range loops. - const Stmt *Body = - S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); - for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { - Body = OMPLoopDirective::tryToFindNextInnerLoop( - Body, /*TryImperfectlyNestedLoops=*/true); - if (auto *For = dyn_cast(Body)) { - Body = For->getBody(); - } else { - assert(isa(Body) && - "Expected canonical for loop or range-based for loop."); - auto *CXXFor = cast(Body); + for (const Stmt *Body : Loops) { + if (auto *CXXFor = dyn_cast(Body)) { if (const Stmt *Init = CXXFor->getInit()) CGF.EmitStmt(Init); CGF.EmitStmt(CXXFor->getRangeStmt()); CGF.EmitStmt(CXXFor->getEndStmt()); - Body = CXXFor->getBody(); } } + + // Emit captures. if (const auto *PreInits = cast_or_null(S.getPreInits())) { for (const auto *I : PreInits->decls()) CGF.EmitVarDecl(cast(*I)); @@ -1657,7 +1660,8 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, int MaxLevel, int Level = 0) { assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); - const Stmt *SimplifiedS = S->IgnoreContainers(); + const Stmt *SimplifiedS = + getTopmostAssociatedStructuredBlock(S->IgnoreContainers(), nullptr); if (const auto *CS = dyn_cast(SimplifiedS)) { PrettyStackTraceLoc CrashInfo( CGF.getContext().getSourceManager(), CS->getLBracLoc(), @@ -2305,6 +2309,11 @@ checkForLastprivateConditionalUpdate(*this, S); } +void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { + // Emit the de-sugared statement. + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPOuterLoop( bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope, @@ -5153,6 +5162,7 @@ case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3286,6 +3286,7 @@ void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); + void EmitOMPTileDirective(const OMPTileDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPSectionsDirective(const OMPSectionsDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1952,6 +1952,7 @@ break; case OMPD_parallel: case OMPD_simd: + case OMPD_tile: case OMPD_task: case OMPD_taskyield: case OMPD_barrier: @@ -2184,6 +2185,7 @@ LLVM_FALLTHROUGH; case OMPD_parallel: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -2318,6 +2320,11 @@ HasAssociatedStatement = false; } + if (DKind == OMPD_tile && !FirstClauses[unsigned(OMPC_sizes)].getInt()) { + Diag(Loc, diag::err_omp_required_clause) + << getOpenMPDirectiveName(OMPD_tile) << "sizes"; + } + StmtResult AssociatedStmt; if (HasAssociatedStatement) { // The body is a block scope like in Lambdas and Blocks. @@ -2429,6 +2436,37 @@ return !IsCorrect; } +OMPClause *Parser::ParseOpenMPSizesClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SmallVector ValExprs; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + while (true) { + ExprResult Val = ParseConstantExpression(); + if (!Val.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ValExprs.push_back(Val.get()); + + if (Tok.is(tok::r_paren) || Tok.is(tok::annot_pragma_openmp_end)) + break; + + ExpectAndConsume(tok::comma); + } + + T.consumeClose(); + + return Actions.ActOnOpenMPSizesClause( + ValExprs, ClauseNameLoc, T.getOpenLocation(), T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) { SourceLocation Loc = Tok.getLocation(); ConsumeAnyToken(); @@ -2665,6 +2703,15 @@ case OMPC_affinity: Clause = ParseOpenMPVarListClause(DKind, CKind, WrongDirective); break; + case OMPC_sizes: + if (!FirstClause) { + Diag(Tok, diag::err_omp_more_one_clause) + << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0; + ErrorFound = true; + } + + Clause = ParseOpenMPSizesClause(); + break; case OMPC_uses_allocators: Clause = ParseOpenMPUsesAllocatorClause(DKind); break; diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1460,6 +1460,7 @@ case Stmt::OMPSectionDirectiveClass: case Stmt::OMPSectionsDirectiveClass: case Stmt::OMPSimdDirectiveClass: + case Stmt::OMPTileDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -35,6 +35,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerEmbeddedInt.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include @@ -3258,7 +3259,8 @@ void VisitSubCaptures(OMPExecutableDirective *S) { // Check implicitly captured variables. - if (!S->hasAssociatedStmt() || !S->getAssociatedStmt()) + if (!S->hasAssociatedStmt() || !S->getAssociatedStmt() || + isOpenMPLoopTransformationDirective(S->getDirectiveKind())) return; visitSubCaptures(S->getInnermostCapturedStmt()); // Try to capture inner this->member references to generate correct mappings @@ -3594,6 +3596,12 @@ // Check implicitly captured variables. VisitSubCaptures(S); } + + void VisitOMPTileDirective(OMPTileDirective *S) { + // #pragma omp tile does not introduce data sharing. + VisitStmt(S); + } + void VisitStmt(Stmt *S) { for (Stmt *C : S->children()) { if (C) { @@ -4029,6 +4037,9 @@ AlwaysInlineAttr::Keyword_forceinline)); break; } + case OMPD_tile: + // loop transformations have no captures. + break; case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: @@ -4265,6 +4276,9 @@ } } DSAStack->setForceVarCapturing(/*V=*/false); + } else if (isOpenMPLoopTransformationDirective( + DSAStack->getCurrentDirective())) { + assert(CaptureRegions.empty()); } else if (CaptureRegions.size() > 1 || CaptureRegions.back() != OMPD_unknown) { if (auto *C = OMPClauseWithPreInit::get(Clause)) @@ -4939,7 +4953,8 @@ VarsWithInheritedDSAType VarsWithInheritedDSA; bool ErrorFound = false; ClausesWithImplicit.append(Clauses.begin(), Clauses.end()); - if (AStmt && !CurContext->isDependentContext()) { + if (AStmt && !CurContext->isDependentContext() && + !isOpenMPLoopTransformationDirective(Kind)) { assert(isa(AStmt) && "Captured statement expected"); // Check default data sharing attributes for referenced variables. @@ -5032,6 +5047,10 @@ if (LangOpts.OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; + case OMPD_tile: + Res = ActOnOpenMPTileDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, + VarsWithInheritedDSA); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -5379,6 +5398,7 @@ case OMPC_collapse: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_default: case OMPC_proc_bind: case OMPC_private: @@ -6214,6 +6234,10 @@ namespace { /// Iteration space of a single for loop. struct LoopIterationSpace final { + /// The loop that has been analyzed. + Stmt *Loop = nullptr; + /// The loop's body. + Stmt *Body = nullptr; /// True if the condition operator is the strict compare operator (<, > or /// !=). bool IsStrictCompare = false; @@ -6266,6 +6290,10 @@ class OpenMPIterationSpaceChecker { /// Reference to Sema. Sema &SemaRef; + /// Is the loop associated directive capturing its referenced variables? + bool Capturing; + /// Does the loop associated directive support non-rectangular loops? + bool SupportsNonRectangular; /// Data-sharing stack. DSAStackTy &Stack; /// A location for diagnostics (when there is no some better location). @@ -6314,16 +6342,18 @@ Expr *Condition = nullptr; public: - OpenMPIterationSpaceChecker(Sema &SemaRef, DSAStackTy &Stack, + OpenMPIterationSpaceChecker(Sema &SemaRef, bool Capturing, + bool SupportsNonRectangular, DSAStackTy &Stack, SourceLocation DefaultLoc) - : SemaRef(SemaRef), Stack(Stack), DefaultLoc(DefaultLoc), - ConditionLoc(DefaultLoc) {} + : SemaRef(SemaRef), Capturing(Capturing), + SupportsNonRectangular(SupportsNonRectangular), Stack(Stack), + DefaultLoc(DefaultLoc), ConditionLoc(DefaultLoc) {} /// Check init-expr for canonical loop form and save loop counter /// variable - #Var and its initialization value - #LB. bool checkAndSetInit(Stmt *S, bool EmitDiags = true); /// Check test-expr for canonical form, save upper-bound (#UB), flags /// for less/greater and for strict/non-strict comparison. - bool checkAndSetCond(Expr *S); + bool checkAndSetCond(Expr *S, bool EmitDiags = true); /// Check incr-expr for canonical loop form and return true if it /// does not conform, otherwise save loop step (#Step). bool checkAndSetInc(Expr *S); @@ -6392,7 +6422,7 @@ bool EmitDiags); /// Helper to set upper bound. bool setUB(Expr *NewUB, llvm::Optional LessOp, bool StrictOp, - SourceRange SR, SourceLocation SL); + SourceRange SR, SourceLocation SL, bool EmitDiags = true); /// Helper to set loop increment. bool setStep(Expr *NewStep, bool Subtract); }; @@ -6432,7 +6462,7 @@ bool OpenMPIterationSpaceChecker::setUB(Expr *NewUB, llvm::Optional LessOp, bool StrictOp, SourceRange SR, - SourceLocation SL) { + SourceLocation SL, bool EmitDiags) { // State consistency checking to ensure correct usage. assert(LCDecl != nullptr && LB != nullptr && UB == nullptr && Step == nullptr && !TestIsLessOp && !TestIsStrictOp); @@ -6444,7 +6474,8 @@ TestIsStrictOp = StrictOp; ConditionSrcRange = SR; ConditionLoc = SL; - CondDependOnLC = doesDependOnLoopCounter(UB, /*IsInitializer=*/false); + if (EmitDiags) + CondDependOnLC = doesDependOnLoopCounter(UB, /*IsInitializer=*/false); return false; } @@ -6521,6 +6552,7 @@ const ValueDecl *DepDecl = nullptr; const ValueDecl *PrevDepDecl = nullptr; bool IsInitializer = true; + bool SupportsNonRectangular; unsigned BaseLoopId = 0; bool checkDecl(const Expr *E, const ValueDecl *VD) { if (getCanonicalDecl(VD) == getCanonicalDecl(CurLCDecl)) { @@ -6543,6 +6575,10 @@ SemaRef.Diag(VD->getLocation(), diag::note_previous_decl) << VD; return false; } + if (Data.first && !SupportsNonRectangular) { + SemaRef.Diag(E->getExprLoc(), diag::err_omp_invariant_dependency); + return false; + } if (Data.first && (DepDecl || (PrevDepDecl && getCanonicalDecl(VD) != getCanonicalDecl(PrevDepDecl)))) { @@ -6587,9 +6623,11 @@ } explicit LoopCounterRefChecker(Sema &SemaRef, DSAStackTy &Stack, const ValueDecl *CurLCDecl, bool IsInitializer, - const ValueDecl *PrevDepDecl = nullptr) + const ValueDecl *PrevDepDecl = nullptr, + bool SupportsNonRectangular = true) : SemaRef(SemaRef), Stack(Stack), CurLCDecl(CurLCDecl), - PrevDepDecl(PrevDepDecl), IsInitializer(IsInitializer) {} + PrevDepDecl(PrevDepDecl), IsInitializer(IsInitializer), + SupportsNonRectangular(SupportsNonRectangular) {} unsigned getBaseLoopId() const { assert(CurLCDecl && "Expected loop dependency."); return BaseLoopId; @@ -6606,7 +6644,7 @@ bool IsInitializer) { // Check for the non-rectangular loops. LoopCounterRefChecker LoopStmtChecker(SemaRef, Stack, LCDecl, IsInitializer, - DepDecl); + DepDecl, SupportsNonRectangular); if (LoopStmtChecker.Visit(S)) { DepDecl = LoopStmtChecker.getDepDecl(); return LoopStmtChecker.getBaseLoopId(); @@ -6721,7 +6759,7 @@ return nullptr; } -bool OpenMPIterationSpaceChecker::checkAndSetCond(Expr *S) { +bool OpenMPIterationSpaceChecker::checkAndSetCond(Expr *S, bool EmitDiags) { // Check test-expr for canonical form, save upper-bound UB, flags for // less/greater and for strict/non-strict comparison. // OpenMP [2.9] Canonical loop form. Test-expr may be one of the following: @@ -6743,17 +6781,18 @@ return setUB(BO->getRHS(), (BO->getOpcode() == BO_LT || BO->getOpcode() == BO_LE), (BO->getOpcode() == BO_LT || BO->getOpcode() == BO_GT), - BO->getSourceRange(), BO->getOperatorLoc()); + BO->getSourceRange(), BO->getOperatorLoc(), EmitDiags); if (getInitLCDecl(BO->getRHS()) == LCDecl) return setUB(BO->getLHS(), (BO->getOpcode() == BO_GT || BO->getOpcode() == BO_GE), (BO->getOpcode() == BO_LT || BO->getOpcode() == BO_GT), - BO->getSourceRange(), BO->getOperatorLoc()); + BO->getSourceRange(), BO->getOperatorLoc(), EmitDiags); } else if (IneqCondIsCanonical && BO->getOpcode() == BO_NE) - return setUB( - getInitLCDecl(BO->getLHS()) == LCDecl ? BO->getRHS() : BO->getLHS(), - /*LessOp=*/llvm::None, - /*StrictOp=*/true, BO->getSourceRange(), BO->getOperatorLoc()); + return setUB(getInitLCDecl(BO->getLHS()) == LCDecl ? BO->getRHS() + : BO->getLHS(), + /*LessOp=*/llvm::None, + /*StrictOp=*/true, BO->getSourceRange(), + BO->getOperatorLoc(), EmitDiags); } else if (auto *CE = dyn_cast(S)) { if (CE->getNumArgs() == 2) { auto Op = CE->getOperator(); @@ -6765,11 +6804,11 @@ if (getInitLCDecl(CE->getArg(0)) == LCDecl) return setUB(CE->getArg(1), Op == OO_Less || Op == OO_LessEqual, Op == OO_Less || Op == OO_Greater, CE->getSourceRange(), - CE->getOperatorLoc()); + CE->getOperatorLoc(), EmitDiags); if (getInitLCDecl(CE->getArg(1)) == LCDecl) return setUB(CE->getArg(0), Op == OO_Greater || Op == OO_GreaterEqual, Op == OO_Less || Op == OO_Greater, CE->getSourceRange(), - CE->getOperatorLoc()); + CE->getOperatorLoc(), EmitDiags); break; case OO_ExclaimEqual: if (IneqCondIsCanonical) @@ -6777,7 +6816,7 @@ : CE->getArg(0), /*LessOp=*/llvm::None, /*StrictOp=*/true, CE->getSourceRange(), - CE->getOperatorLoc()); + CE->getOperatorLoc(), EmitDiags); break; default: break; @@ -6902,8 +6941,10 @@ static ExprResult tryBuildCapture(Sema &SemaRef, Expr *Capture, - llvm::MapVector &Captures) { - if (SemaRef.CurContext->isDependentContext() || Capture->containsErrors()) + llvm::MapVector &Captures, + bool Capturing = true) { + if (!Capturing || SemaRef.CurContext->isDependentContext() || + Capture->containsErrors()) return Capture; if (Capture->isEvaluatable(SemaRef.Context, Expr::SE_AllowSideEffects)) return SemaRef.PerformImplicitConversion( @@ -6924,8 +6965,9 @@ calculateNumIters(Sema &SemaRef, Scope *S, SourceLocation DefaultLoc, Expr *Lower, Expr *Upper, Expr *Step, QualType LCTy, bool TestIsStrictOp, bool RoundToStep, - llvm::MapVector &Captures) { - ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures); + llvm::MapVector &Captures, + bool Capturing) { + ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures, Capturing); if (!NewStep.isUsable()) return nullptr; llvm::APSInt LRes, URes, SRes; @@ -7135,8 +7177,10 @@ if (!LBMaxVal.isUsable()) return nullptr; - Expr *LBMin = tryBuildCapture(SemaRef, LBMinVal.get(), Captures).get(); - Expr *LBMax = tryBuildCapture(SemaRef, LBMaxVal.get(), Captures).get(); + Expr *LBMin = + tryBuildCapture(SemaRef, LBMinVal.get(), Captures, Capturing).get(); + Expr *LBMax = + tryBuildCapture(SemaRef, LBMaxVal.get(), Captures, Capturing).get(); if (!LBMin || !LBMax) return nullptr; // LB(MinVal) < LB(MaxVal) @@ -7145,7 +7189,8 @@ if (!MinLessMaxRes.isUsable()) return nullptr; Expr *MinLessMax = - tryBuildCapture(SemaRef, MinLessMaxRes.get(), Captures).get(); + tryBuildCapture(SemaRef, MinLessMaxRes.get(), Captures, Capturing) + .get(); if (!MinLessMax) return nullptr; if (TestIsLessOp.getValue()) { @@ -7215,8 +7260,10 @@ if (!UBMaxVal.isUsable()) return nullptr; - Expr *UBMin = tryBuildCapture(SemaRef, UBMinVal.get(), Captures).get(); - Expr *UBMax = tryBuildCapture(SemaRef, UBMaxVal.get(), Captures).get(); + Expr *UBMin = + tryBuildCapture(SemaRef, UBMinVal.get(), Captures, Capturing).get(); + Expr *UBMax = + tryBuildCapture(SemaRef, UBMaxVal.get(), Captures, Capturing).get(); if (!UBMin || !UBMax) return nullptr; // UB(MinVal) > UB(MaxVal) @@ -7225,7 +7272,8 @@ if (!MinGreaterMaxRes.isUsable()) return nullptr; Expr *MinGreaterMax = - tryBuildCapture(SemaRef, MinGreaterMaxRes.get(), Captures).get(); + tryBuildCapture(SemaRef, MinGreaterMaxRes.get(), Captures, Capturing) + .get(); if (!MinGreaterMax) return nullptr; if (TestIsLessOp.getValue()) { @@ -7248,14 +7296,14 @@ } Expr *UBExpr = TestIsLessOp.getValue() ? UBVal : LBVal; Expr *LBExpr = TestIsLessOp.getValue() ? LBVal : UBVal; - Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures).get(); - Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures).get(); + Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures, Capturing).get(); + Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures, Capturing).get(); if (!Upper || !Lower) return nullptr; - ExprResult Diff = - calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, - TestIsStrictOp, /*RoundToStep=*/true, Captures); + ExprResult Diff = calculateNumIters( + SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, TestIsStrictOp, + /*RoundToStep=*/true, Captures, Capturing); if (!Diff.isUsable()) return nullptr; @@ -7317,9 +7365,11 @@ bool UBNonRect = TestIsLessOp.getValue() ? CondDependOnLC.hasValue() : InitDependOnLC.hasValue(); Expr *Lower = - LBNonRect ? LBExpr : tryBuildCapture(SemaRef, LBExpr, Captures).get(); + LBNonRect ? LBExpr + : tryBuildCapture(SemaRef, LBExpr, Captures, Capturing).get(); Expr *Upper = - UBNonRect ? UBExpr : tryBuildCapture(SemaRef, UBExpr, Captures).get(); + UBNonRect ? UBExpr + : tryBuildCapture(SemaRef, UBExpr, Captures, Capturing).get(); if (!Upper || !Lower) return std::make_pair(nullptr, nullptr); @@ -7331,9 +7381,9 @@ // Build minimum/maximum value based on number of iterations. QualType VarType = LCDecl->getType().getNonReferenceType(); - ExprResult Diff = - calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, - TestIsStrictOp, /*RoundToStep=*/false, Captures); + ExprResult Diff = calculateNumIters( + SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, TestIsStrictOp, + /*RoundToStep=*/false, Captures, Capturing); if (!Diff.isUsable()) return std::make_pair(nullptr, nullptr); @@ -7343,7 +7393,7 @@ if (!Diff.isUsable()) return std::make_pair(nullptr, nullptr); - ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures); + ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures, Capturing); if (!NewStep.isUsable()) return std::make_pair(nullptr, nullptr); Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Mul, Diff.get(), NewStep.get()); @@ -7429,8 +7479,8 @@ // Try to build LB UB, where is <, >, <=, or >=. Sema::TentativeAnalysisScope Trap(SemaRef); - ExprResult NewLB = tryBuildCapture(SemaRef, LB, Captures); - ExprResult NewUB = tryBuildCapture(SemaRef, UB, Captures); + ExprResult NewLB = tryBuildCapture(SemaRef, LB, Captures, Capturing); + ExprResult NewUB = tryBuildCapture(SemaRef, UB, Captures, Capturing); if (!NewLB.isUsable() || !NewUB.isUsable()) return nullptr; @@ -7516,16 +7566,17 @@ // Upper - Lower Expr *Upper = TestIsLessOp.getValue() ? Cnt - : tryBuildCapture(SemaRef, LB, Captures).get(); + : tryBuildCapture(SemaRef, LB, Captures, Capturing).get(); Expr *Lower = TestIsLessOp.getValue() - ? tryBuildCapture(SemaRef, LB, Captures).get() + ? tryBuildCapture(SemaRef, LB, Captures, Capturing).get() : Cnt; if (!Upper || !Lower) return nullptr; - ExprResult Diff = calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, - Step, VarType, /*TestIsStrictOp=*/false, - /*RoundToStep=*/false, Captures); + ExprResult Diff = + calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, + /*TestIsStrictOp=*/false, + /*RoundToStep=*/false, Captures, Capturing); if (!Diff.isUsable()) return nullptr; @@ -7540,7 +7591,7 @@ if (AssociatedLoops > 0 && isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { DSAStack->loopStart(); - OpenMPIterationSpaceChecker ISC(*this, *DSAStack, ForLoc); + OpenMPIterationSpaceChecker ISC(*this, true, true, *DSAStack, ForLoc); if (!ISC.checkAndSetInit(Init, /*EmitDiags=*/false)) { if (ValueDecl *D = ISC.getLoopDecl()) { auto *VD = dyn_cast(D); @@ -7624,7 +7675,8 @@ Expr *OrderedLoopCountExpr, Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA, llvm::MutableArrayRef ResultIterSpaces, - llvm::MapVector &Captures) { + llvm::MapVector &Captures, bool Capturing, + bool SupportsNonRectangular, bool EmitDiags) { // OpenMP [2.9.1, Canonical Loop Form] // for (init-expr; test-expr; incr-expr) structured-block // for (range-decl: range-expr) structured-block @@ -7636,7 +7688,8 @@ << (CollapseLoopCountExpr != nullptr || OrderedLoopCountExpr != nullptr) << getOpenMPDirectiveName(DKind) << TotalNestedLoopCount << (CurrentNestedLoopCount > 0) << CurrentNestedLoopCount; - if (TotalNestedLoopCount > 1) { + if (TotalNestedLoopCount > 1 && + (CollapseLoopCountExpr || OrderedLoopCountExpr)) { if (CollapseLoopCountExpr && OrderedLoopCountExpr) SemaRef.Diag(DSA.getConstructLoc(), diag::note_omp_collapse_ordered_expr) @@ -7653,15 +7706,18 @@ } return true; } - assert(((For && For->getBody()) || (CXXFor && CXXFor->getBody())) && - "No loop body."); + ResultIterSpaces[CurrentNestedLoopCount].Loop = S; + Stmt *&Body = ResultIterSpaces[CurrentNestedLoopCount].Body; + Body = For ? For->getBody() : CXXFor->getBody(); + assert(Body && "No loop body."); - OpenMPIterationSpaceChecker ISC(SemaRef, DSA, + OpenMPIterationSpaceChecker ISC(SemaRef, Capturing, SupportsNonRectangular, + DSA, For ? For->getForLoc() : CXXFor->getForLoc()); // Check init. Stmt *Init = For ? For->getInit() : CXXFor->getBeginStmt(); - if (ISC.checkAndSetInit(Init)) + if (ISC.checkAndSetInit(Init, EmitDiags)) return true; bool HasErrors = false; @@ -7696,7 +7752,8 @@ assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars"); // Check test-expr. - HasErrors |= ISC.checkAndSetCond(For ? For->getCond() : CXXFor->getCond()); + HasErrors |= ISC.checkAndSetCond(For ? For->getCond() : CXXFor->getCond(), + EmitDiags); // Check incr-expr. HasErrors |= ISC.checkAndSetInc(For ? For->getInc() : CXXFor->getInc()); @@ -7712,7 +7769,8 @@ ISC.buildNumIterations(DSA.getCurScope(), ResultIterSpaces, (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || - isOpenMPDistributeDirective(DKind)), + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)), Captures); ResultIterSpaces[CurrentNestedLoopCount].CounterVar = ISC.buildCounterVar(Captures, DSA); @@ -7741,7 +7799,9 @@ ISC.getLoopDependentIdx(); HasErrors |= - (ResultIterSpaces[CurrentNestedLoopCount].PreCond == nullptr || + (ResultIterSpaces[CurrentNestedLoopCount].Loop == nullptr || + ResultIterSpaces[CurrentNestedLoopCount].Body == nullptr || + ResultIterSpaces[CurrentNestedLoopCount].PreCond == nullptr || ResultIterSpaces[CurrentNestedLoopCount].NumIterations == nullptr || ResultIterSpaces[CurrentNestedLoopCount].CounterVar == nullptr || ResultIterSpaces[CurrentNestedLoopCount].PrivateCounterVar == nullptr || @@ -7794,11 +7854,13 @@ static ExprResult buildCounterInit(Sema &SemaRef, Scope *S, SourceLocation Loc, ExprResult VarRef, ExprResult Start, bool IsNonRectangularLB, - llvm::MapVector &Captures) { + llvm::MapVector &Captures, + bool Capturing) { // Build 'VarRef = Start. - ExprResult NewStart = IsNonRectangularLB - ? Start.get() - : tryBuildCapture(SemaRef, Start.get(), Captures); + ExprResult NewStart = + IsNonRectangularLB + ? Start.get() + : tryBuildCapture(SemaRef, Start.get(), Captures, Capturing); if (!NewStart.isUsable()) return ExprError(); if (!SemaRef.Context.hasSameType(NewStart.get()->getType(), @@ -7969,8 +8031,17 @@ Expr *OrderedLoopCountExpr, Stmt *AStmt, Sema &SemaRef, DSAStackTy &DSA, Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA, - OMPLoopDirective::HelperExprs &Built) { - unsigned NestedLoopCount = 1; + OMPLoopDirective::HelperExprs &Built, unsigned MinLoopCount = 1, + bool EmitDiags = true) { + assert(MinLoopCount == 1 || + (!CollapseLoopCountExpr && !OrderedLoopCountExpr)); + + unsigned NestedLoopCount = MinLoopCount; + bool Capturing = !isOpenMPLoopTransformationDirective(DKind); + bool SupportsNonPerfectlyNested = (SemaRef.LangOpts.OpenMP >= 50) && + !isOpenMPLoopTransformationDirective(DKind); + bool SupportsNonRectangular = !isOpenMPLoopTransformationDirective(DKind); + if (CollapseLoopCountExpr) { // Found 'collapse' clause - calculate collapse number. Expr::EvalResult Result; @@ -8004,17 +8075,22 @@ return 1; } } + unsigned NumAssocociatedLoops = std::max(OrderedLoopCount, NestedLoopCount); + // This is helper routine for loop directives (e.g., 'for', 'simd', // 'for simd', etc.). llvm::MapVector Captures; - SmallVector IterSpaces( - std::max(OrderedLoopCount, NestedLoopCount)); - Stmt *CurStmt = AStmt->IgnoreContainers(/* IgnoreCaptured */ true); + SmallVector IterSpaces(NumAssocociatedLoops); + Stmt *CurStmt = AStmt; for (unsigned Cnt = 0; Cnt < NestedLoopCount; ++Cnt) { - if (checkOpenMPIterationSpace( - DKind, CurStmt, SemaRef, DSA, Cnt, NestedLoopCount, - std::max(OrderedLoopCount, NestedLoopCount), CollapseLoopCountExpr, - OrderedLoopCountExpr, VarsWithImplicitDSA, IterSpaces, Captures)) + CurStmt = OMPLoopDirective::tryToFindNextInnerLoop( + CurStmt, CurStmt > 0 && SupportsNonPerfectlyNested); + + if (checkOpenMPIterationSpace(DKind, CurStmt, SemaRef, DSA, Cnt, + NestedLoopCount, NumAssocociatedLoops, + CollapseLoopCountExpr, OrderedLoopCountExpr, + VarsWithImplicitDSA, IterSpaces, Captures, + Capturing, SupportsNonRectangular, EmitDiags)) return 0; // Move on to the next nested for loop, or to the loop body. // OpenMP [2.8.1, simd construct, Restrictions] @@ -8028,14 +8104,16 @@ "Expected canonical for or range-based for loops."); CurStmt = cast(CurStmt)->getBody(); } - CurStmt = OMPLoopDirective::tryToFindNextInnerLoop( - CurStmt, SemaRef.LangOpts.OpenMP >= 50); } for (unsigned Cnt = NestedLoopCount; Cnt < OrderedLoopCount; ++Cnt) { + CurStmt = OMPLoopDirective::tryToFindNextInnerLoop( + CurStmt, SupportsNonPerfectlyNested); + if (checkOpenMPIterationSpace( DKind, CurStmt, SemaRef, DSA, Cnt, NestedLoopCount, std::max(OrderedLoopCount, NestedLoopCount), CollapseLoopCountExpr, - OrderedLoopCountExpr, VarsWithImplicitDSA, IterSpaces, Captures)) + OrderedLoopCountExpr, VarsWithImplicitDSA, IterSpaces, Captures, + Capturing, SupportsNonRectangular, EmitDiags)) return 0; if (Cnt > 0 && IterSpaces[Cnt].CounterVar) { // Handle initialization of captured loop iterator variables. @@ -8056,8 +8134,6 @@ "Expected canonical for or range-based for loops."); CurStmt = cast(CurStmt)->getBody(); } - CurStmt = OMPLoopDirective::tryToFindNextInnerLoop( - CurStmt, SemaRef.LangOpts.OpenMP >= 50); } Built.clear(/* size */ NestedLoopCount); @@ -8192,7 +8268,7 @@ ExprResult CalcLastIteration; if (!IsConstant) { ExprResult SaveRef = - tryBuildCapture(SemaRef, LastIteration.get(), Captures); + tryBuildCapture(SemaRef, LastIteration.get(), Captures, Capturing); LastIteration = SaveRef; // Prepare SaveRef + 1. @@ -8208,7 +8284,8 @@ // Build variables passed into runtime, necessary for worksharing directives. ExprResult LB, UB, IL, ST, EUB, CombLB, CombUB, PrevLB, PrevUB, CombEUB; if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || - isOpenMPDistributeDirective(DKind)) { + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) { // Lower bound variable, initialized with zero. VarDecl *LBDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.lb"); LB = buildDeclRefExpr(SemaRef, LBDecl, VType, InitLoc); @@ -8306,11 +8383,12 @@ { VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv"); IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc); - Expr *RHS = - (isOpenMPWorksharingDirective(DKind) || - isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)) - ? LB.get() - : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get(); + Expr *RHS = (isOpenMPWorksharingDirective(DKind) || + isOpenMPTaskLoopDirective(DKind) || + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) + ? LB.get() + : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get(); Init = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, IV.get(), RHS); Init = SemaRef.ActOnFinishFullExpr(Init.get(), /*DiscardedValue*/ false); @@ -8348,7 +8426,8 @@ } ExprResult Cond = (isOpenMPWorksharingDirective(DKind) || - isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)) + isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) ? SemaRef.BuildBinOp(CurScope, CondLoc, UseStrictCompare ? BO_LT : BO_LE, IV.get(), BoundUB) @@ -8396,7 +8475,8 @@ // base variables for the update ExprResult NextLB, NextUB, CombNextLB, CombNextUB; if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || - isOpenMPDistributeDirective(DKind)) { + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) { // LB + ST NextLB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, LB.get(), ST.get()); if (!NextLB.isUsable()) @@ -8507,6 +8587,8 @@ Built.DependentCounters.resize(NestedLoopCount); Built.DependentInits.resize(NestedLoopCount); Built.FinalsConditions.resize(NestedLoopCount); + Built.Loops.resize(NestedLoopCount); + Built.Bodys.resize(NestedLoopCount); { // We implement the following algorithm for obtaining the // original loop iteration variable values based on the @@ -8565,27 +8647,28 @@ auto *VD = cast(cast(IS.CounterVar)->getDecl()); DeclRefExpr *CounterVar = buildDeclRefExpr( SemaRef, VD, IS.CounterVar->getType(), IS.CounterVar->getExprLoc(), - /*RefersToCapture=*/true); - ExprResult Init = - buildCounterInit(SemaRef, CurScope, UpdLoc, CounterVar, - IS.CounterInit, IS.IsNonRectangularLB, Captures); + /*RefersToCapture=*/Capturing); + ExprResult Init = buildCounterInit(SemaRef, CurScope, UpdLoc, CounterVar, + IS.CounterInit, IS.IsNonRectangularLB, + Captures, Capturing); if (!Init.isUsable()) { HasErrors = true; break; } ExprResult Update = buildCounterUpdate( SemaRef, CurScope, UpdLoc, CounterVar, IS.CounterInit, Iter, - IS.CounterStep, IS.Subtract, IS.IsNonRectangularLB, &Captures); + IS.CounterStep, IS.Subtract, IS.IsNonRectangularLB, + Capturing ? &Captures : nullptr); if (!Update.isUsable()) { HasErrors = true; break; } // Build final: IS.CounterVar = IS.Start + IS.NumIters * IS.Step - ExprResult Final = - buildCounterUpdate(SemaRef, CurScope, UpdLoc, CounterVar, - IS.CounterInit, IS.NumIterations, IS.CounterStep, - IS.Subtract, IS.IsNonRectangularLB, &Captures); + ExprResult Final = buildCounterUpdate( + SemaRef, CurScope, UpdLoc, CounterVar, IS.CounterInit, + IS.NumIterations, IS.CounterStep, IS.Subtract, IS.IsNonRectangularLB, + Capturing ? &Captures : nullptr); if (!Final.isUsable()) { HasErrors = true; break; @@ -8596,6 +8679,8 @@ break; } // Save results + Built.Loops[Cnt] = IS.Loop; + Built.Bodys[Cnt] = IS.Body; Built.Counters[Cnt] = IS.CounterVar; Built.PrivateCounters[Cnt] = IS.PrivateCounterVar; Built.Inits[Cnt] = Init.get(); @@ -8651,6 +8736,7 @@ Built.DistCombinedFields.DistCond = CombDistCond.get(); Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get(); + assert(Capturing || Captures.empty()); return NestedLoopCount; } @@ -11573,6 +11659,217 @@ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +StmtResult +Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA) { + auto SizesClauses = + OMPExecutableDirective::getClausesOfKind(Clauses); + if (SizesClauses.begin() == SizesClauses.end()) { + // A missing 'sizes' clause is already reported by the parser. + return StmtError(); + } + const OMPSizesClause *SizesClause = *SizesClauses.begin(); + unsigned NumLoops = SizesClause->getNumSizes(); + + // Empty statement should only be possible if there already was an error. + if (!AStmt) + return StmtError(); + + // Verify and diagnose loop nest. + OMPLoopDirective::HelperExprs NestHelper; + unsigned NestedLoopCount = checkOpenMPLoop( + OMPD_tile, nullptr, nullptr, AStmt, *this, *DSAStack, VarsWithImplicitDSA, + NestHelper, NumLoops, /*EmitDiags=*/true); + if (NestedLoopCount != NumLoops) + return StmtError(); + + // Delay tiling to when template is completely instantiated. + if (CurContext->isDependentContext()) + return OMPTileDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, nullptr); + + // Derive per-loop logical iteration spaces. + SmallVector LoopHelpers; + LoopHelpers.resize(NumLoops); + for (unsigned i = 0; i < NumLoops; ++i) { + Stmt *LoopStmt = NestHelper.Loops[i]; + VarsWithInheritedDSAType TmpDSA; + unsigned SingleNumLoops = + checkOpenMPLoop(OMPD_tile, nullptr, nullptr, LoopStmt, *this, *DSAStack, + TmpDSA, LoopHelpers[i], 1, /*EmitDiags=*/false); + (void)SingleNumLoops; + assert(SingleNumLoops == 1 && "Expect single loop iteration space"); + } + + // Collection of generated variable declaration. + SmallVector PreInits; + + // Create iteration variables for the generated loops. + SmallVector FloorIndVars; + SmallVector TileIndVars; + SmallVector BodyParts; + FloorIndVars.resize(NumLoops); + TileIndVars.resize(NumLoops); + BodyParts.reserve(2 * NumLoops + 1); + for (unsigned i = 0; i < NumLoops; i += 1) { + OMPLoopDirective::HelperExprs &LoopHelper = LoopHelpers[i]; + assert(LoopHelper.Counters.size() == 1 && + "Expect single-dimensional loop iteration space"); + auto *OrigCntVar = cast(LoopHelper.Counters[0]); + std::string OrigVarName = OrigCntVar->getNameInfo().getAsString(); + DeclRefExpr *IterVarRef = cast(LoopHelper.IterationVarRef); + QualType CntTy = IterVarRef->getType(); + + // Iteration variable for the floor (i.e. outer) loop. + { + std::string FloorCntName = + (Twine(".floor_") + llvm::utostr(i) + ".iv." + OrigVarName).str(); + VarDecl *FloorCntDecl = + buildVarDecl(*this, {}, CntTy, FloorCntName, nullptr, OrigCntVar); + FloorIndVars[i] = FloorCntDecl; + + // Create a declaration statement for the new iteration variable. + Decl *D = FloorCntDecl; + auto *DeclS = + new (Context) DeclStmt(DeclGroupRef::Create(Context, &D, 1), {}, {}); + PreInits.push_back(DeclS); + } + + // Iteration variable for the tile (i.e. inner) loop. + { + std::string TileCntName = + (Twine(".tile_") + llvm::utostr(i) + ".iv." + OrigVarName).str(); + + // Reuse the iteration variable created by checkOpenMPLoop. It is also + // used by the expressions to derive the original iteration variable's + // value from the logical iteration number. + auto *TileCntDecl = cast(IterVarRef->getDecl()); + TileCntDecl->setDeclName(&PP.getIdentifierTable().get(TileCntName)); + TileIndVars[i] = TileCntDecl; + + // Create a declaration statement for the new iteration variable. + Decl *D = TileCntDecl; + auto DeclS = + new (Context) DeclStmt(DeclGroupRef::Create(Context, &D, 1), {}, {}); + PreInits.push_back(DeclS); + } + + // Statements to set the original iteration variable's value from the + // logical iteration number. + // FIXME: If the innermost body is an loop itself, inserting these + // statements stops it being recognized as a perfectly nested loop (e.g. + // for applying tiling again). If this is the case, sink the expressions + // further into the inner loop. + { + // Declaration of the original loop iteration variable. + Decl *CounterDecl = OrigCntVar->getDecl(); + auto *CounterDeclStmt = new (Context) + DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), {}, {}); + + // Update expression generated by checkOpenMPLoop. + Stmt *Upd = LoopHelper.Updates[0]; + + BodyParts.push_back(CounterDeclStmt); + BodyParts.push_back(Upd); + } + } + + // Once the original iteration values are set, append the innermost body. + BodyParts.push_back(NestHelper.Bodys.back()); + Stmt *Inner = CompoundStmt::Create(Context, BodyParts, AStmt->getBeginLoc(), + AStmt->getEndLoc()); + + // Create tile loops from the inside to the outside. + for (int i = NumLoops - 1; i >= 0; --i) { + OMPLoopDirective::HelperExprs &LoopHelper = LoopHelpers[i]; + Expr *NumIterations = LoopHelper.NumIterations; + auto *OrigCntVar = cast(LoopHelper.Counters[0]); + QualType CntTy = OrigCntVar->getType(); + Expr *DimTileSize = SizesClause->getSizesRefs()[i]; + Scope *CurScope = getCurScope(); + + // Commonly used variables. + DeclRefExpr *TileIV = buildDeclRefExpr(*this, TileIndVars[i], CntTy, {}); + DeclRefExpr *FloorIV = buildDeclRefExpr(*this, FloorIndVars[i], CntTy, {}); + + // For init-statement: .tile.iv = .floor.iv + ExprResult InitStmt = BuildBinOp(CurScope, {}, BO_Assign, TileIV, FloorIV); + if (!InitStmt.isUsable()) + return StmtError(); + + // For cond-expression: .tile.iv < min(.floor.iv + DimTileSize, + // NumIterations) + ExprResult EndOfTile = + BuildBinOp(CurScope, {}, BO_Add, FloorIV, DimTileSize); + if (!EndOfTile.isUsable()) + return StmtError(); + ExprResult IsPartialTile = + BuildBinOp(CurScope, {}, BO_LT, NumIterations, EndOfTile.get()); + if (!IsPartialTile.isUsable()) + return StmtError(); + ExprResult MinTileAndIterSpace = ActOnConditionalOp( + {}, {}, IsPartialTile.get(), NumIterations, EndOfTile.get()); + if (!MinTileAndIterSpace.isUsable()) + return StmtError(); + ExprResult CondExpr = + BuildBinOp(CurScope, {}, BO_LT, TileIV, MinTileAndIterSpace.get()); + if (!CondExpr.isUsable()) + return StmtError(); + + // For incr-statement: ++.tile.iv + ExprResult IncrStmt = BuildUnaryOp(CurScope, {}, UO_PreInc, TileIV); + if (!IncrStmt.isUsable()) + return StmtError(); + + Inner = new (Context) ForStmt(Context, InitStmt.get(), CondExpr.get(), + nullptr, IncrStmt.get(), Inner, {}, {}, {}); + } + + // Create floor loops from the inside to the outside. + for (int i = NumLoops - 1; i >= 0; --i) { + auto &LoopHelper = LoopHelpers[i]; + Expr *NumIterations = LoopHelper.NumIterations; + DeclRefExpr *OrigCntVar = cast(LoopHelper.Counters[0]); + QualType CntTy = OrigCntVar->getType(); + Expr *DimTileSize = SizesClause->getSizesRefs()[i]; + Scope *CurScope = getCurScope(); + + // Commonly used variables. + DeclRefExpr *FloorIV = buildDeclRefExpr(*this, FloorIndVars[i], CntTy, {}); + + // For init-statement: .floor.iv = 0 + auto *IVStart = IntegerLiteral::Create( + Context, llvm::APInt::getNullValue(Context.getTypeSize(CntTy)), CntTy, + {}); + ExprResult InitStmt = BuildBinOp(CurScope, {}, BO_Assign, FloorIV, IVStart); + if (!InitStmt.isUsable()) + return StmtError(); + + // For cond-expression: .floor.iv < NumIterations + ExprResult CondExpr = + BuildBinOp(CurScope, {}, BO_LT, FloorIV, NumIterations); + if (!CondExpr.isUsable()) + return StmtError(); + + // For incr-statement: .floor.iv += DimTileSize + ExprResult IncrStmt = + BuildBinOp(CurScope, {}, BO_AddAssign, FloorIV, DimTileSize); + if (!IncrStmt.isUsable()) + return StmtError(); + + Inner = new (Context) ForStmt(Context, InitStmt.get(), CondExpr.get(), + nullptr, IncrStmt.get(), Inner, {}, {}, {}); + } + + // Create the de-sugared tile loop nest including pre-inits. + PreInits.push_back(Inner); + auto *TransformedStmt = CompoundStmt::Create(Context, PreInits, {}, {}); + + return OMPTileDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, + AStmt, TransformedStmt); +} + OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -11644,6 +11941,7 @@ case OMPC_untied: case OMPC_mergeable: case OMPC_threadprivate: + case OMPC_sizes: case OMPC_allocate: case OMPC_flush: case OMPC_read: @@ -11806,6 +12104,7 @@ // Do not capture if-clause expressions. break; case OMPD_threadprivate: + case OMPD_tile: case OMPD_allocate: case OMPD_taskyield: case OMPD_barrier: @@ -11898,6 +12197,7 @@ case OMPD_end_declare_target: case OMPD_teams: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -11976,6 +12276,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12052,6 +12353,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12129,6 +12431,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -12205,6 +12508,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12280,6 +12584,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12358,6 +12663,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12386,6 +12692,7 @@ case OMPC_proc_bind: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_private: @@ -12816,6 +13123,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_schedule: @@ -12996,6 +13304,22 @@ EndLoc); } +OMPClause *Sema::ActOnOpenMPSizesClause(ArrayRef SizeExprs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + for (auto SizeExpr : SizeExprs) { + ExprResult NumForLoopsResult = + VerifyPositiveIntegerConstantInClause(SizeExpr, OMPC_sizes, true); + if (!NumForLoopsResult.isUsable()) + return nullptr; + } + + DSAStack->setAssociatedLoops(SizeExprs.size()); + return OMPSizesClause::Create(Context, StartLoc, LParenLoc, EndLoc, + SizeExprs); +} + OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( OpenMPClauseKind Kind, ArrayRef Argument, Expr *Expr, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -13043,6 +13367,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_default: @@ -13297,6 +13622,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_schedule: @@ -13574,6 +13900,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_default: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1611,6 +1611,13 @@ return getSema().ActOnOpenMPSimdlenClause(Len, StartLoc, LParenLoc, EndLoc); } + OMPClause *RebuildOMPSizesClause(ArrayRef Sizes, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -8317,7 +8324,7 @@ StmtResult Body; { Sema::CompoundScopeRAII CompoundScope(getSema()); - Stmt *CS = D->getInnermostCapturedStmt()->getCapturedStmt(); + Stmt *CS = D->ignoreCaptures(); Body = getDerived().TransformStmt(CS); } AssociatedStmt = @@ -8370,6 +8377,17 @@ return Res; } +template +StmtResult +TreeTransform::TransformOMPTileDirective(OMPTileDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName, + nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { @@ -9012,6 +9030,31 @@ E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause *TreeTransform::TransformOMPSizesClause(OMPSizesClause *C) { + SmallVector TransformedSizes; + TransformedSizes.reserve(C->getNumSizes()); + bool Changed = false; + for (Expr *E : C->getSizesRefs()) { + if (!E) { + TransformedSizes.push_back(nullptr); + continue; + } + + ExprResult T = getDerived().TransformExpr(E); + if (T.isInvalid()) + return nullptr; + if (E != T.get()) + Changed = true; + TransformedSizes.push_back(T.get()); + } + + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + return RebuildOMPSizesClause(TransformedSizes, C->getBeginLoc(), + C->getLParenLoc(), C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11754,6 +11754,11 @@ case llvm::omp::OMPC_simdlen: C = new (Context) OMPSimdlenClause(); break; + case llvm::omp::OMPC_sizes: { + unsigned NumSizes = Record.readInt(); + C = OMPSizesClause::CreateEmpty(Context, NumSizes); + break; + } case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -12043,6 +12048,12 @@ C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) { + for (auto &E : C->getSizesRefs()) + E = Record.readSubExpr(); + C->setLParenLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2269,6 +2269,10 @@ // Two fields (NumClauses and CollapsedNum) were read in ReadStmtFromStream. Record.skipInts(2); VisitOMPExecutableDirective(D); + + if (isOpenMPLoopTransformationDirective(D->getDirectiveKind())) + return; + D->setIterationVariable(Record.readSubExpr()); D->setLastIteration(Record.readSubExpr()); D->setCalcLastIteration(Record.readSubExpr()); @@ -2353,6 +2357,11 @@ VisitOMPLoopDirective(D); } +void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) { + VisitOMPLoopDirective(D); + D->setTransformedStmt(Record.readStmt()); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setTaskReductionRefExpr(Record.readSubExpr()); @@ -3249,6 +3258,13 @@ break; } + case STMT_OMP_TILE_DIRECTIVE: { + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPTileDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } + case STMT_OMP_FOR_DIRECTIVE: { unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6178,6 +6178,13 @@ Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) { + Record.push_back(C->getNumSizes()); + for (auto Size : C->getSizesRefs()) + Record.AddStmt(Size); + Record.AddSourceLocation(C->getLParenLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2167,6 +2167,10 @@ Record.push_back(D->getNumClauses()); Record.push_back(D->getCollapsedNumber()); VisitOMPExecutableDirective(D); + + if (isOpenMPLoopTransformationDirective(D->getDirectiveKind())) + return; + Record.AddStmt(D->getIterationVariable()); Record.AddStmt(D->getLastIteration()); Record.AddStmt(D->getCalcLastIteration()); @@ -2239,6 +2243,12 @@ Code = serialization::STMT_OMP_SIMD_DIRECTIVE; } +void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) { + VisitOMPLoopDirective(D); + Record.AddStmt(D->getTransformedStmt()); + Code = serialization::STMT_OMP_TILE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.AddStmt(D->getTaskReductionRefExpr()); diff --git a/clang/test/Index/openmp-tile.c b/clang/test/Index/openmp-tile.c new file mode 100644 --- /dev/null +++ b/clang/test/Index/openmp-tile.c @@ -0,0 +1,12 @@ +// RUN: c-index-test -test-load-source local %s -fopenmp | FileCheck %s + +void test() { +#pragma omp tile sizes(5) + for (int i = 0; i < 65; i += 1) + ; +} + +// CHECK: openmp-tile.c:4:1: OMPTileDirective= Extent=[4:1 - 4:26] +// CHECK: openmp-tile.c:4:24: IntegerLiteral= Extent=[4:24 - 4:25] +// CHECK: openmp-tile.c:5:3: ForStmt= Extent=[5:3 - 6:6] + diff --git a/clang/test/OpenMP/tile_ast_print.cpp b/clang/test/OpenMP/tile_ast_print.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/tile_ast_print.cpp @@ -0,0 +1,164 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...); + + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp tile sizes(5, 5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5,5) + // PRINT: for (int i = 7; i < 17; i += 3) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 3) + // PRINT: for (int j = 7; j < 17; j += 3) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 3) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2(int start1, int start2, int end1, int end2) { + // PRINT: #pragma omp tile sizes(5, 5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5,5) + // PRINT: for (int i = start1; i < end1; i += 1) + // DUMP-NEXT: ForStmt + for (int i = start1; i < end1; i += 1) + // PRINT: for (int j = start2; j < end2; j += 1) + // DUMP: ForStmt + for (int j = start2; j < end2; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo3( +// DUMP-LABEL: FunctionDecl {{.*}} foo3 +void foo3() { + // PRINT: #pragma omp for + // DUMP: OMPForDirective + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for + // PRINT: #pragma omp tile sizes(5) + // DUMP-NEXT: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5) + for (int i = 7; i < 17; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo4( +// DUMP-LABEL: FunctionDecl {{.*}} foo4 +void foo4() { + // PRINT: #pragma omp for collapse(3) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: IntegerLiteral {{.*}} 3 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(3) + // PRINT: #pragma omp tile sizes(5, 5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5, 5) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5(int start, int end, int step) { + // PRINT: #pragma omp for collapse(2) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: IntegerLiteral {{.*}} 2 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(2) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: #pragma omp tile sizes(5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP-NEXT: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionTemplateDecl {{.*}} foo6 +template +void foo6(T start, T end) { + // PRINT: #pragma omp tile sizes(Tile) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: DeclRefExpr {{.*}} 'Tile' 'T' + #pragma omp tile sizes(Tile) + // PRINT-NEXT: for (T i = start; i < end; i += Step) + // DUMP-NEXT: ForStmt + for (T i = start; i < end; i += Step) + // PRINT-NEXT: body(i); + // DUMP: CallExpr + body(i); +} + +// Also test instantiating the template. +void tfoo6() { + foo6(0, 42); +} + + +#endif + diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -0,0 +1,981 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...) {} + + +// IR-LABEL: define void @foo1( +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], [[TMP2]] +// IR-NEXT: [[SUB1:%.*]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[ADD:%.*]] = add i32 [[SUB1]], [[TMP3]] +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP4]] +// IR-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 +// IR-NEXT: [[ADD3:%.*]] = add i32 [[SUB2]], 1 +// IR-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]] +// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END25:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP5]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4:%.*]] +// IR: for.cond4: +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[SUB5:%.*]] = sub i32 [[TMP7]], [[TMP8]] +// IR-NEXT: [[SUB6:%.*]] = sub i32 [[SUB5]], 1 +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[ADD7:%.*]] = add i32 [[SUB6]], [[TMP9]] +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[DIV8:%.*]] = udiv i32 [[ADD7]], [[TMP10]] +// IR-NEXT: [[SUB9:%.*]] = sub i32 [[DIV8]], 1 +// IR-NEXT: [[ADD10:%.*]] = add i32 [[SUB9]], 1 +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP11]], 5 +// IR-NEXT: [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]] +// IR-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[SUB13:%.*]] = sub i32 [[TMP12]], [[TMP13]] +// IR-NEXT: [[SUB14:%.*]] = sub i32 [[SUB13]], 1 +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[ADD15:%.*]] = add i32 [[SUB14]], [[TMP14]] +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[DIV16:%.*]] = udiv i32 [[ADD15]], [[TMP15]] +// IR-NEXT: [[SUB17:%.*]] = sub i32 [[DIV16]], 1 +// IR-NEXT: [[ADD18:%.*]] = add i32 [[SUB17]], 1 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[ADD18]], [[COND_TRUE]] ], [ [[ADD19]], [[COND_FALSE]] ] +// IR-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TMP6]], [[COND]] +// IR-NEXT: br i1 [[CMP20]], label [[FOR_BODY21:%.*]], label [[FOR_END:%.*]] +// IR: for.body21: +// IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP17]], i32* [[I]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], [[TMP20]] +// IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP18]], [[MUL]] +// IR-NEXT: store i32 [[ADD22]], i32* [[I]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP21]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4]] +// IR: for.end: +// IR-NEXT: br label [[FOR_INC23:%.*]] +// IR: for.inc23: +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 5 +// IR-NEXT: store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND]] +// IR: for.end25: +// IR-NEXT: ret void +// +extern "C" void foo1(int start, int end, int step) { +#pragma omp tile sizes(5) + for (int i = start; i < end; i += step) + body(i); +} + + +// IR-LABEL: define void @foo2( +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END30:%.*]] +// IR: for.body: +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND1:%.*]] +// IR: for.cond1: +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 4 +// IR-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END27:%.*]] +// IR: for.body3: +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP2]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4:%.*]] +// IR: for.cond4: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 5 +// IR-NEXT: [[CMP5:%.*]] = icmp slt i32 4, [[ADD]] +// IR-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 5 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD6]], [[COND_FALSE]] ] +// IR-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP3]], [[COND]] +// IR-NEXT: br i1 [[CMP7]], label [[FOR_BODY8:%.*]], label [[FOR_END24:%.*]] +// IR: for.body8: +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: store i32 [[TMP6]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND9:%.*]] +// IR: for.cond9: +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP8]], 5 +// IR-NEXT: [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]] +// IR-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// IR: cond.true12: +// IR-NEXT: br label [[COND_END15:%.*]] +// IR: cond.false13: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 5 +// IR-NEXT: br label [[COND_END15]] +// IR: cond.end15: +// IR-NEXT: [[COND16:%.*]] = phi i32 [ 4, [[COND_TRUE12]] ], [ [[ADD14]], [[COND_FALSE13]] ] +// IR-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP7]], [[COND16]] +// IR-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END:%.*]] +// IR: for.body18: +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 3 +// IR-NEXT: [[ADD19:%.*]] = add nsw i32 7, [[MUL]] +// IR-NEXT: store i32 [[ADD19]], i32* [[I]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[MUL20:%.*]] = mul nsw i32 [[TMP11]], 3 +// IR-NEXT: [[ADD21:%.*]] = add nsw i32 7, [[MUL20]] +// IR-NEXT: store i32 [[ADD21]], i32* [[J]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[J]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP12]], i32 [[TMP13]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND9]] +// IR: for.end: +// IR-NEXT: br label [[FOR_INC22:%.*]] +// IR: for.inc22: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC23:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[INC23]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4]] +// IR: for.end24: +// IR-NEXT: br label [[FOR_INC25:%.*]] +// IR: for.inc25: +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: store i32 [[ADD26]], i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND1]] +// IR: for.end27: +// IR-NEXT: br label [[FOR_INC28:%.*]] +// IR: for.inc28: +// IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP17]], 5 +// IR-NEXT: store i32 [[ADD29]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND]] +// IR: for.end30: +// IR-NEXT: ret void +// +extern "C" void foo2(int start, int end, int step) { +#pragma omp tile sizes(5,5) + for (int i = 7; i < 17; i+=3) + for (int j = 7; j < 17; j+=3) + body(i,j); +} + + +// IR-LABEL: define void @foo3( +// IR-NEXT: entry: +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @0, i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 0 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// IR-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 5 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP7]], 4 +// IR-NEXT: br i1 [[CMP3]], label [[FOR_BODY:%.*]], label [[FOR_END33:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4:%.*]] +// IR: for.cond4: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 5 +// IR-NEXT: [[CMP6:%.*]] = icmp slt i32 4, [[ADD5]] +// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// IR: cond.true7: +// IR-NEXT: br label [[COND_END10:%.*]] +// IR: cond.false8: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], 5 +// IR-NEXT: br label [[COND_END10]] +// IR: cond.end10: +// IR-NEXT: [[COND11:%.*]] = phi i32 [ 4, [[COND_TRUE7]] ], [ [[ADD9]], [[COND_FALSE8]] ] +// IR-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP9]], [[COND11]] +// IR-NEXT: br i1 [[CMP12]], label [[FOR_BODY13:%.*]], label [[FOR_END30:%.*]] +// IR: for.body13: +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: store i32 [[TMP12]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND14:%.*]] +// IR: for.cond14: +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP14]], 5 +// IR-NEXT: [[CMP16:%.*]] = icmp slt i32 4, [[ADD15]] +// IR-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// IR: cond.true17: +// IR-NEXT: br label [[COND_END20:%.*]] +// IR: cond.false18: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP15]], 5 +// IR-NEXT: br label [[COND_END20]] +// IR: cond.end20: +// IR-NEXT: [[COND21:%.*]] = phi i32 [ 4, [[COND_TRUE17]] ], [ [[ADD19]], [[COND_FALSE18]] ] +// IR-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP13]], [[COND21]] +// IR-NEXT: br i1 [[CMP22]], label [[FOR_BODY23:%.*]], label [[FOR_END:%.*]] +// IR: for.body23: +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP16]], 3 +// IR-NEXT: [[ADD25:%.*]] = add nsw i32 7, [[MUL24]] +// IR-NEXT: store i32 [[ADD25]], i32* [[I]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[MUL26:%.*]] = mul nsw i32 [[TMP17]], 3 +// IR-NEXT: [[ADD27:%.*]] = add nsw i32 7, [[MUL26]] +// IR-NEXT: store i32 [[ADD27]], i32* [[J]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[J]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP18]], i32 [[TMP19]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND14]] +// IR: for.end: +// IR-NEXT: br label [[FOR_INC28:%.*]] +// IR: for.inc28: +// IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC29:%.*]] = add nsw i32 [[TMP21]], 1 +// IR-NEXT: store i32 [[INC29]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4]] +// IR: for.end30: +// IR-NEXT: br label [[FOR_INC31:%.*]] +// IR: for.inc31: +// IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP22]], 5 +// IR-NEXT: store i32 [[ADD32]], i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND]] +// IR: for.end33: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-NEXT: store i32 [[ADD34]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @0, i32 [[TMP0]]) +// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 [[TMP0]]) +// IR-NEXT: ret void +// +extern "C" void foo3() { +#pragma omp for +#pragma omp tile sizes(5,5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +// IR-LABEL: define void @foo4( +// IR-NEXT: entry: +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[K:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @0, i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// IR-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 1 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// IR-NEXT: store i32 [[ADD]], i32* [[K]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP8]], 1 +// IR-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 1 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]] +// IR-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 5 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// IR-NEXT: store i32 [[ADD7]], i32* [[DOTFLOOR_0_IV_I2]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP9]], 4 +// IR-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END38:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I2]], align 4 +// IR-NEXT: store i32 [[TMP10]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND9:%.*]] +// IR: for.cond9: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I2]], align 4 +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 5 +// IR-NEXT: [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]] +// IR-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// IR: cond.true12: +// IR-NEXT: br label [[COND_END15:%.*]] +// IR: cond.false13: +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I2]], align 4 +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP13]], 5 +// IR-NEXT: br label [[COND_END15]] +// IR: cond.end15: +// IR-NEXT: [[COND16:%.*]] = phi i32 [ 4, [[COND_TRUE12]] ], [ [[ADD14]], [[COND_FALSE13]] ] +// IR-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP11]], [[COND16]] +// IR-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END35:%.*]] +// IR: for.body18: +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: store i32 [[TMP14]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND19:%.*]] +// IR: for.cond19: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: [[CMP21:%.*]] = icmp slt i32 4, [[ADD20]] +// IR-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// IR: cond.true22: +// IR-NEXT: br label [[COND_END25:%.*]] +// IR: cond.false23: +// IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP17]], 5 +// IR-NEXT: br label [[COND_END25]] +// IR: cond.end25: +// IR-NEXT: [[COND26:%.*]] = phi i32 [ 4, [[COND_TRUE22]] ], [ [[ADD24]], [[COND_FALSE23]] ] +// IR-NEXT: [[CMP27:%.*]] = icmp slt i32 [[TMP15]], [[COND26]] +// IR-NEXT: br i1 [[CMP27]], label [[FOR_BODY28:%.*]], label [[FOR_END:%.*]] +// IR: for.body28: +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL29:%.*]] = mul nsw i32 [[TMP18]], 3 +// IR-NEXT: [[ADD30:%.*]] = add nsw i32 7, [[MUL29]] +// IR-NEXT: store i32 [[ADD30]], i32* [[I]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[MUL31:%.*]] = mul nsw i32 [[TMP19]], 3 +// IR-NEXT: [[ADD32:%.*]] = add nsw i32 7, [[MUL31]] +// IR-NEXT: store i32 [[ADD32]], i32* [[J]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[J]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP20]], i32 [[TMP21]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND19]] +// IR: for.end: +// IR-NEXT: br label [[FOR_INC33:%.*]] +// IR: for.inc33: +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC34:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-NEXT: store i32 [[INC34]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND9]] +// IR: for.end35: +// IR-NEXT: br label [[FOR_INC36:%.*]] +// IR: for.inc36: +// IR-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP24]], 5 +// IR-NEXT: store i32 [[ADD37]], i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label [[FOR_COND]] +// IR: for.end38: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD39:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @0, i32 [[TMP0]]) +// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 [[TMP0]]) +// IR-NEXT: ret void +// +extern "C" void foo4() { +#pragma omp for collapse(2) + for (int k = 7; k < 17; k += 3) +#pragma omp tile sizes(5,5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +// IR-LABEL: define void @foo5( +// IR-NEXT: entry: +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTFLOOR_0_IV_I10:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I13:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I14:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J15:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +// IR-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 4, [[ADD]] +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP]], align 4 +// IR-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 5 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD4]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// IR-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[ADD7:%.*]] = add i32 [[SUB6]], 1 +// IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD7]], 1 +// IR-NEXT: [[CONV:%.*]] = zext i32 [[DIV]] to i64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i64 1, [[CONV]] +// IR-NEXT: [[MUL8:%.*]] = mul nsw i64 [[MUL]], 4 +// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL8]], 1 +// IR-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I10]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: store i32 [[TMP6]], i32* [[DOTTILE_0_IV_I11]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @0, i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: [[CMP16:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// IR: cond.true17: +// IR-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: br label [[COND_END19:%.*]] +// IR: cond.false18: +// IR-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: br label [[COND_END19]] +// IR: cond.end19: +// IR-NEXT: [[COND20:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE17]] ], [ [[TMP13]], [[COND_FALSE18]] ] +// IR-NEXT: store i64 [[COND20]], i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// IR-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: [[CMP21:%.*]] = icmp sle i64 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP21]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB22:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// IR-NEXT: [[SUB23:%.*]] = sub i32 [[SUB22]], 1 +// IR-NEXT: [[ADD24:%.*]] = add i32 [[SUB23]], 1 +// IR-NEXT: [[DIV25:%.*]] = udiv i32 [[ADD24]], 1 +// IR-NEXT: [[MUL26:%.*]] = mul i32 1, [[DIV25]] +// IR-NEXT: [[MUL27:%.*]] = mul i32 [[MUL26]], 4 +// IR-NEXT: [[CONV28:%.*]] = zext i32 [[MUL27]] to i64 +// IR-NEXT: [[DIV29:%.*]] = sdiv i64 [[TMP17]], [[CONV28]] +// IR-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV29]], 5 +// IR-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] +// IR-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 +// IR-NEXT: store i32 [[CONV32]], i32* [[DOTFLOOR_0_IV_I13]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CONV33:%.*]] = sext i32 [[TMP20]] to i64 +// IR-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB34:%.*]] = sub i32 [[TMP23]], [[TMP24]] +// IR-NEXT: [[SUB35:%.*]] = sub i32 [[SUB34]], 1 +// IR-NEXT: [[ADD36:%.*]] = add i32 [[SUB35]], 1 +// IR-NEXT: [[DIV37:%.*]] = udiv i32 [[ADD36]], 1 +// IR-NEXT: [[MUL38:%.*]] = mul i32 1, [[DIV37]] +// IR-NEXT: [[MUL39:%.*]] = mul i32 [[MUL38]], 4 +// IR-NEXT: [[CONV40:%.*]] = zext i32 [[MUL39]] to i64 +// IR-NEXT: [[DIV41:%.*]] = sdiv i64 [[TMP22]], [[CONV40]] +// IR-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB42:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// IR-NEXT: [[SUB43:%.*]] = sub i32 [[SUB42]], 1 +// IR-NEXT: [[ADD44:%.*]] = add i32 [[SUB43]], 1 +// IR-NEXT: [[DIV45:%.*]] = udiv i32 [[ADD44]], 1 +// IR-NEXT: [[MUL46:%.*]] = mul i32 1, [[DIV45]] +// IR-NEXT: [[MUL47:%.*]] = mul i32 [[MUL46]], 4 +// IR-NEXT: [[CONV48:%.*]] = zext i32 [[MUL47]] to i64 +// IR-NEXT: [[MUL49:%.*]] = mul nsw i64 [[DIV41]], [[CONV48]] +// IR-NEXT: [[SUB50:%.*]] = sub nsw i64 [[TMP21]], [[MUL49]] +// IR-NEXT: [[DIV51:%.*]] = sdiv i64 [[SUB50]], 4 +// IR-NEXT: [[MUL52:%.*]] = mul nsw i64 [[DIV51]], 1 +// IR-NEXT: [[ADD53:%.*]] = add nsw i64 [[CONV33]], [[MUL52]] +// IR-NEXT: [[CONV54:%.*]] = trunc i64 [[ADD53]] to i32 +// IR-NEXT: store i32 [[CONV54]], i32* [[DOTTILE_0_IV_I14]], align 4 +// IR-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB55:%.*]] = sub i32 [[TMP29]], [[TMP30]] +// IR-NEXT: [[SUB56:%.*]] = sub i32 [[SUB55]], 1 +// IR-NEXT: [[ADD57:%.*]] = add i32 [[SUB56]], 1 +// IR-NEXT: [[DIV58:%.*]] = udiv i32 [[ADD57]], 1 +// IR-NEXT: [[MUL59:%.*]] = mul i32 1, [[DIV58]] +// IR-NEXT: [[MUL60:%.*]] = mul i32 [[MUL59]], 4 +// IR-NEXT: [[CONV61:%.*]] = zext i32 [[MUL60]] to i64 +// IR-NEXT: [[DIV62:%.*]] = sdiv i64 [[TMP28]], [[CONV61]] +// IR-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB63:%.*]] = sub i32 [[TMP31]], [[TMP32]] +// IR-NEXT: [[SUB64:%.*]] = sub i32 [[SUB63]], 1 +// IR-NEXT: [[ADD65:%.*]] = add i32 [[SUB64]], 1 +// IR-NEXT: [[DIV66:%.*]] = udiv i32 [[ADD65]], 1 +// IR-NEXT: [[MUL67:%.*]] = mul i32 1, [[DIV66]] +// IR-NEXT: [[MUL68:%.*]] = mul i32 [[MUL67]], 4 +// IR-NEXT: [[CONV69:%.*]] = zext i32 [[MUL68]] to i64 +// IR-NEXT: [[MUL70:%.*]] = mul nsw i64 [[DIV62]], [[CONV69]] +// IR-NEXT: [[SUB71:%.*]] = sub nsw i64 [[TMP27]], [[MUL70]] +// IR-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB72:%.*]] = sub i32 [[TMP35]], [[TMP36]] +// IR-NEXT: [[SUB73:%.*]] = sub i32 [[SUB72]], 1 +// IR-NEXT: [[ADD74:%.*]] = add i32 [[SUB73]], 1 +// IR-NEXT: [[DIV75:%.*]] = udiv i32 [[ADD74]], 1 +// IR-NEXT: [[MUL76:%.*]] = mul i32 1, [[DIV75]] +// IR-NEXT: [[MUL77:%.*]] = mul i32 [[MUL76]], 4 +// IR-NEXT: [[CONV78:%.*]] = zext i32 [[MUL77]] to i64 +// IR-NEXT: [[DIV79:%.*]] = sdiv i64 [[TMP34]], [[CONV78]] +// IR-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB80:%.*]] = sub i32 [[TMP37]], [[TMP38]] +// IR-NEXT: [[SUB81:%.*]] = sub i32 [[SUB80]], 1 +// IR-NEXT: [[ADD82:%.*]] = add i32 [[SUB81]], 1 +// IR-NEXT: [[DIV83:%.*]] = udiv i32 [[ADD82]], 1 +// IR-NEXT: [[MUL84:%.*]] = mul i32 1, [[DIV83]] +// IR-NEXT: [[MUL85:%.*]] = mul i32 [[MUL84]], 4 +// IR-NEXT: [[CONV86:%.*]] = zext i32 [[MUL85]] to i64 +// IR-NEXT: [[MUL87:%.*]] = mul nsw i64 [[DIV79]], [[CONV86]] +// IR-NEXT: [[SUB88:%.*]] = sub nsw i64 [[TMP33]], [[MUL87]] +// IR-NEXT: [[DIV89:%.*]] = sdiv i64 [[SUB88]], 4 +// IR-NEXT: [[MUL90:%.*]] = mul nsw i64 [[DIV89]], 4 +// IR-NEXT: [[SUB91:%.*]] = sub nsw i64 [[SUB71]], [[MUL90]] +// IR-NEXT: [[MUL92:%.*]] = mul nsw i64 [[SUB91]], 3 +// IR-NEXT: [[ADD93:%.*]] = add nsw i64 7, [[MUL92]] +// IR-NEXT: [[CONV94:%.*]] = trunc i64 [[ADD93]] to i32 +// IR-NEXT: store i32 [[CONV94]], i32* [[J15]], align 4 +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTTILE_0_IV_I14]], align 4 +// IR-NEXT: [[MUL95:%.*]] = mul nsw i32 [[TMP39]], 3 +// IR-NEXT: [[ADD96:%.*]] = add nsw i32 7, [[MUL95]] +// IR-NEXT: store i32 [[ADD96]], i32* [[I]], align 4 +// IR-NEXT: [[TMP40:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP41:%.*]] = load i32, i32* [[J15]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP40]], i32 [[TMP41]]) +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[ADD97:%.*]] = add nsw i64 [[TMP42]], 1 +// IR-NEXT: store i64 [[ADD97]], i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @0, i32 [[TMP0]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 [[TMP0]]) +// IR-NEXT: ret void +// +extern "C" void foo5() { +#pragma omp for collapse(3) +#pragma omp tile sizes(5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +// IR-LABEL: define void @foo6( +// IR-NEXT: entry: +// IR-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// IR-NEXT: ret void +// +// IR-LABEL: define internal void @.omp_outlined.( +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @0, i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// IR-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 5 +// IR-NEXT: [[CMP4:%.*]] = icmp slt i32 4, [[ADD3]] +// IR-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// IR: cond.true5: +// IR-NEXT: br label [[COND_END8:%.*]] +// IR: cond.false6: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I1]], align 4 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 5 +// IR-NEXT: br label [[COND_END8]] +// IR: cond.end8: +// IR-NEXT: [[COND9:%.*]] = phi i32 [ 4, [[COND_TRUE5]] ], [ [[ADD7]], [[COND_FALSE6]] ] +// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP9]], [[COND9]] +// IR-NEXT: br i1 [[CMP10]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR: for.body: +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 3 +// IR-NEXT: [[ADD12:%.*]] = add nsw i32 7, [[MUL11]] +// IR-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP13]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND]] +// IR: for.end: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @0, i32 [[TMP1]]) +// IR-NEXT: ret void +// +extern "C" void foo6() { +#pragma omp parallel for +#pragma omp tile sizes(5) + for (int i = 7; i < 17; i += 3) + body(i); +} + + +template +void foo7(T start, T end) { +#pragma omp tile sizes(Tile) + for (T i = start; i < end; i += Step) + body(i); +} + +// IR-LABEL: define void @tfoo7( +// IR-NEXT: entry: +// IR-NEXT: call void @_Z4foo7IiLi3ELi5EEvT_S0_(i32 0, i32 42) +// IR-NEXT: ret void +// +// IR-LABEL: define linkonce_odr void @_Z4foo7IiLi3ELi5EEvT_S0_( +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], [[TMP2]] +// IR-NEXT: [[SUB1:%.*]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[ADD:%.*]] = add i32 [[SUB1]], 3 +// IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 3 +// IR-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 +// IR-NEXT: [[ADD3:%.*]] = add i32 [[SUB2]], 1 +// IR-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]] +// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END25:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP3]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4:%.*]] +// IR: for.cond4: +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[SUB5:%.*]] = sub i32 [[TMP5]], [[TMP6]] +// IR-NEXT: [[SUB6:%.*]] = sub i32 [[SUB5]], 1 +// IR-NEXT: [[ADD7:%.*]] = add i32 [[SUB6]], 3 +// IR-NEXT: [[DIV8:%.*]] = udiv i32 [[ADD7]], 3 +// IR-NEXT: [[SUB9:%.*]] = sub i32 [[DIV8]], 1 +// IR-NEXT: [[ADD10:%.*]] = add i32 [[SUB9]], 1 +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP7]], 5 +// IR-NEXT: [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]] +// IR-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[SUB13:%.*]] = sub i32 [[TMP8]], [[TMP9]] +// IR-NEXT: [[SUB14:%.*]] = sub i32 [[SUB13]], 1 +// IR-NEXT: [[ADD15:%.*]] = add i32 [[SUB14]], 3 +// IR-NEXT: [[DIV16:%.*]] = udiv i32 [[ADD15]], 3 +// IR-NEXT: [[SUB17:%.*]] = sub i32 [[DIV16]], 1 +// IR-NEXT: [[ADD18:%.*]] = add i32 [[SUB17]], 1 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP10]], 5 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[ADD18]], [[COND_TRUE]] ], [ [[ADD19]], [[COND_FALSE]] ] +// IR-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TMP4]], [[COND]] +// IR-NEXT: br i1 [[CMP20]], label [[FOR_BODY21:%.*]], label [[FOR_END:%.*]] +// IR: for.body21: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP11]], i32* [[I]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 3 +// IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP12]], [[MUL]] +// IR-NEXT: store i32 [[ADD22]], i32* [[I]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP14]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND4]] +// IR: for.end: +// IR-NEXT: br label [[FOR_INC23:%.*]] +// IR: for.inc23: +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND]] +// IR: for.end25: +// IR-NEXT: ret void +// +extern "C" void tfoo7() { + foo7(0, 42); +} + + +#endif /* HEADER */ + diff --git a/clang/test/OpenMP/tile_messages.cpp b/clang/test/OpenMP/tile_messages.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/tile_messages.cpp @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=51 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+1 {{expected '('}} + #pragma omp tile sizes + ; + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes( + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes() + ; + + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes(5 + for (int i = 0; i < 7; ++i); + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes(5, + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes(5,) + ; + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes(5+ + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes(5+) + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes(for) + ; + + // expected-error@+1 {{argument to 'sizes' clause must be a strictly positive integer value}} + #pragma omp tile sizes(0) + ; + + // expected-error@+4 {{expression is not an integral constant expression}} + // expected-note@+3 {{read of non-const variable 'a' is not allowed in a constant expression}} + // expected-note@+1 {{declared here}} + int a; + #pragma omp tile sizes(a) + ; + + // expected-warning@+2 {{extra tokens at the end of '#pragma omp tile' are ignored}} + // expected-error@+1 {{directive '#pragma omp tile' requires the 'sizes' clause}} + #pragma omp tile foo + ; + + // expected-error@+1 {{directive '#pragma omp tile' cannot contain more than one 'sizes' clause}} + #pragma omp tile sizes(5) sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+1 {{unexpected OpenMP clause 'collapse' in directive '#pragma omp tile'}} + #pragma omp tile sizes(5) collapse(2) + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp tile sizes(5) + } + + // expected-error@+2 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5) + int b = 0; + + // expected-error@+3 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) { + int k = 3; + for (int j = 0; j < 7; ++j) + ; + } + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = i; j < 7; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+4 {{expected 3 for loops after '#pragma omp for', but found only 2}} + // expected-note@+1 {{as specified in 'collapse' clause}} + #pragma omp for collapse(3) + #pragma omp tile sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5) + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} + #pragma omp tile sizes(5) + for (int i = 0; i/3<7; ++i) + ; +} + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2033,6 +2033,7 @@ void VisitOMPLoopDirective(const OMPLoopDirective *D); void VisitOMPParallelDirective(const OMPParallelDirective *D); void VisitOMPSimdDirective(const OMPSimdDirective *D); + void VisitOMPTileDirective(const OMPTileDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -2205,6 +2206,11 @@ Visitor->AddStmt(C->getSimdlen()); } +void OMPClauseEnqueue::VisitOMPSizesClause(const OMPSizesClause *C) { + for (auto E : C->getSizesRefs()) + Visitor->AddStmt(E); +} + void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { Visitor->AddStmt(C->getAllocator()); } @@ -2845,6 +2851,10 @@ VisitOMPLoopDirective(D); } +void EnqueueVisitor::VisitOMPTileDirective(const OMPTileDirective *D) { + VisitOMPLoopDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -5522,6 +5532,8 @@ return cxstring::createRef("OMPParallelDirective"); case CXCursor_OMPSimdDirective: return cxstring::createRef("OMPSimdDirective"); + case CXCursor_OMPTileDirective: + return cxstring::createRef("OMPTileDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -645,6 +645,9 @@ case Stmt::OMPSimdDirectiveClass: K = CXCursor_OMPSimdDirective; break; + case Stmt::OMPTileDirectiveClass: + K = CXCursor_OMPTileDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -39,6 +39,7 @@ } def OMPC_SafeLen : Clause<"safelen"> { let clangClass = "OMPSafelenClause"; } def OMPC_SimdLen : Clause<"simdlen"> { let clangClass = "OMPSimdlenClause"; } +def OMPC_Sizes: Clause<"sizes"> { let clangClass = "OMPClauseClause"; } def OMPC_Collapse : Clause<"collapse"> { let clangClass = "OMPCollapseClause"; } def OMPC_Default : Clause<"default"> { let clangClass = "OMPDefaultClause"; } def OMPC_Private : Clause<"private"> { let clangClass = "OMPPrivateClause"; } @@ -210,6 +211,9 @@ OMPC_Aligned, OMPC_SafeLen, OMPC_SimdLen, OMPC_Collapse, OMPC_Reduction, OMPC_Allocate, OMPC_If, OMPC_NonTemporal, OMPC_Order]; } +def OMP_Tile : Directive<"tile"> { + let allowedClauses = [OMPC_Sizes]; +} def OMP_For : Directive<"for"> { let allowedClauses = [OMPC_Private, OMPC_LastPrivate, OMPC_FirstPrivate, OMPC_Reduction, OMPC_Collapse, OMPC_Schedule, OMPC_Ordered, OMPC_NoWait, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -27,6 +27,7 @@ __OMP_DIRECTIVE(parallel) __OMP_DIRECTIVE(task) __OMP_DIRECTIVE(simd) +__OMP_DIRECTIVE(tile) __OMP_DIRECTIVE(for) __OMP_DIRECTIVE(sections) __OMP_DIRECTIVE(section) @@ -138,6 +139,7 @@ __OMP_CLAUSE(num_threads, OMPNumThreadsClause) __OMP_CLAUSE(safelen, OMPSafelenClause) __OMP_CLAUSE(simdlen, OMPSimdlenClause) +__OMP_CLAUSE(sizes, OMPSizesClause) __OMP_CLAUSE(collapse, OMPCollapseClause) __OMP_CLAUSE(default, OMPDefaultClause) __OMP_CLAUSE(private, OMPPrivateClause) @@ -1188,6 +1190,8 @@ __OMP_DIRECTIVE_CLAUSE(simd, 50, ~0, nontemporal) __OMP_DIRECTIVE_CLAUSE(simd, 50, ~0, order) +__OMP_DIRECTIVE_CLAUSE(tile, 1, ~0, sizes) + __OMP_DIRECTIVE_CLAUSE(for, 1, ~0, private) __OMP_DIRECTIVE_CLAUSE(for, 1, ~0, lastprivate) __OMP_DIRECTIVE_CLAUSE(for, 1, ~0, firstprivate)