diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2568,7 +2568,11 @@ */ CXCursor_OMPScanDirective = 287, - CXCursor_LastStmt = CXCursor_OMPScanDirective, + /** OpenMP tile directive. + */ + CXCursor_OMPTileDirective = 288, + + CXCursor_LastStmt = CXCursor_OMPTileDirective, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -794,6 +794,100 @@ } }; +/// This represents the 'sizes' clause in the '#pragma omp tile' directive. +/// +/// \code +/// #pragma omp tile sizes(5,5) +/// for (int i = 0; i < 64; ++i) +/// for (int j = 0; j < 64; ++j) +/// \endcode +class OMPSizesClause final + : public OMPClause, + private llvm::TrailingObjects { + friend class OMPClauseReader; + friend class llvm::TrailingObjects; + + /// Location of '('. + SourceLocation LParenLoc; + + /// Number of tile sizes in the clause. + unsigned NumSizes; + + /// Build an empty clause. + explicit OMPSizesClause(int NumSizes) + : OMPClause(llvm::omp::OMPC_sizes, SourceLocation(), SourceLocation()), + NumSizes(NumSizes) {} + +public: + /// Build a 'sizes' AST node. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the 'sizes' identifier. + /// \param LParenLoc Location of '('. + /// \param EndLoc Location of ')'. + /// \param Sizes Content of the clause. + static OMPSizesClause *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, + ArrayRef Sizes); + + /// Build an empty 'sizes' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param Sizes Number of items in the clause. + static OMPSizesClause *CreateEmpty(const ASTContext &C, unsigned NumSizes); + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns the number of list items. + unsigned getNumSizes() const { return NumSizes; } + + /// Returns the tile size expressions. + MutableArrayRef getSizesRefs() { + return MutableArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumSizes); + } + ArrayRef getSizesRefs() const { + return ArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumSizes); + } + + /// Sets the tile size expressions. + void setSizesRefs(ArrayRef VL) { + assert(VL.size() == NumSizes); + std::copy(VL.begin(), VL.end(), + static_cast(this) + ->template getTrailingObjects()); + } + + child_range children() { + MutableArrayRef Sizes = getSizesRefs(); + return child_range(reinterpret_cast(Sizes.begin()), + reinterpret_cast(Sizes.end())); + } + const_child_range children() const { + ArrayRef Sizes = getSizesRefs(); + return const_child_range(reinterpret_cast(Sizes.begin()), + reinterpret_cast(Sizes.end())); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_sizes; + } +}; + /// This represents 'collapse' clause in the '#pragma omp ...' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2799,6 +2799,9 @@ DEF_TRAVERSE_STMT(OMPSimdDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPTileDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPForDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) @@ -3039,6 +3042,13 @@ return true; } +template +bool RecursiveASTVisitor::VisitOMPSizesClause(OMPSizesClause *C) { + for (Expr *E : C->getSizesRefs()) + TRY_TO(TraverseStmt(E)); + return true; +} + template bool RecursiveASTVisitor::VisitOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -440,13 +440,260 @@ } }; +/// The base class for all loop-based directives, including loop transformation +/// directives. +class OMPLoopBasedDirective : public OMPExecutableDirective { + friend class ASTStmtReader; + +protected: + /// Number of collapsed loops as specified by 'collapse' clause. + unsigned NumAssociatedLoops = 0; + + /// Build instance of loop directive of class \a Kind. + /// + /// \param SC Statement class. + /// \param Kind Kind of OpenMP directive. + /// \param StartLoc Starting location of the directive (directive keyword). + /// \param EndLoc Ending location of the directive. + /// \param NumAssociatedLoops Number of loops associated with the construct. + /// + OMPLoopBasedDirective(StmtClass SC, OpenMPDirectiveKind Kind, + SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumAssociatedLoops) + : OMPExecutableDirective(SC, Kind, StartLoc, EndLoc), + NumAssociatedLoops(NumAssociatedLoops) {} + +public: + /// The expressions built to support OpenMP loops in combined/composite + /// pragmas (e.g. pragma omp distribute parallel for) + struct DistCombinedHelperExprs { + /// DistributeLowerBound - used when composing 'omp distribute' with + /// 'omp for' in a same construct. + Expr *LB; + /// DistributeUpperBound - used when composing 'omp distribute' with + /// 'omp for' in a same construct. + Expr *UB; + /// DistributeEnsureUpperBound - used when composing 'omp distribute' + /// with 'omp for' in a same construct, EUB depends on DistUB + Expr *EUB; + /// Distribute loop iteration variable init used when composing 'omp + /// distribute' + /// with 'omp for' in a same construct + Expr *Init; + /// Distribute Loop condition used when composing 'omp distribute' + /// with 'omp for' in a same construct + Expr *Cond; + /// Update of LowerBound for statically scheduled omp loops for + /// outer loop in combined constructs (e.g. 'distribute parallel for') + Expr *NLB; + /// Update of UpperBound for statically scheduled omp loops for + /// outer loop in combined constructs (e.g. 'distribute parallel for') + Expr *NUB; + /// Distribute Loop condition used when composing 'omp distribute' + /// with 'omp for' in a same construct when schedule is chunked. + Expr *DistCond; + /// 'omp parallel for' loop condition used when composed with + /// 'omp distribute' in the same construct and when schedule is + /// chunked and the chunk size is 1. + Expr *ParForInDistCond; + }; + + /// The expressions built for the OpenMP loop CodeGen for the + /// whole collapsed loop nest. + struct HelperExprs { + /// Loop iteration variable. + Expr *IterationVarRef; + /// Loop last iteration number. + Expr *LastIteration; + /// Loop number of iterations. + Expr *NumIterations; + /// Calculation of last iteration. + Expr *CalcLastIteration; + /// Loop pre-condition. + Expr *PreCond; + /// Loop condition. + Expr *Cond; + /// Loop iteration variable init. + Expr *Init; + /// Loop increment. + Expr *Inc; + /// IsLastIteration - local flag variable passed to runtime. + Expr *IL; + /// LowerBound - local variable passed to runtime. + Expr *LB; + /// UpperBound - local variable passed to runtime. + Expr *UB; + /// Stride - local variable passed to runtime. + Expr *ST; + /// EnsureUpperBound -- expression UB = min(UB, NumIterations). + Expr *EUB; + /// Update of LowerBound for statically scheduled 'omp for' loops. + Expr *NLB; + /// Update of UpperBound for statically scheduled 'omp for' loops. + Expr *NUB; + /// PreviousLowerBound - local variable passed to runtime in the + /// enclosing schedule or null if that does not apply. + Expr *PrevLB; + /// PreviousUpperBound - local variable passed to runtime in the + /// enclosing schedule or null if that does not apply. + Expr *PrevUB; + /// DistInc - increment expression for distribute loop when found + /// combined with a further loop level (e.g. in 'distribute parallel for') + /// expression IV = IV + ST + Expr *DistInc; + /// PrevEUB - expression similar to EUB but to be used when loop + /// scheduling uses PrevLB and PrevUB (e.g. in 'distribute parallel for' + /// when ensuring that the UB is either the calculated UB by the runtime or + /// the end of the assigned distribute chunk) + /// expression UB = min (UB, PrevUB) + Expr *PrevEUB; + /// Counters Loop counters. + SmallVector Counters; + /// PrivateCounters Loop counters. + SmallVector PrivateCounters; + /// Expressions for loop counters inits for CodeGen. + SmallVector Inits; + /// Expressions for loop counters update for CodeGen. + SmallVector Updates; + /// Final loop counter values for GodeGen. + SmallVector Finals; + /// List of counters required for the generation of the non-rectangular + /// loops. + SmallVector DependentCounters; + /// List of initializers required for the generation of the non-rectangular + /// loops. + SmallVector DependentInits; + /// List of final conditions required for the generation of the + /// non-rectangular loops. + SmallVector FinalsConditions; + /// Init statement for all captured expressions. + Stmt *PreInits; + + /// Expressions used when combining OpenMP loop pragmas + DistCombinedHelperExprs DistCombinedFields; + + /// Check if all the expressions are built (does not check the + /// worksharing ones). + bool builtAll() { + return IterationVarRef != nullptr && LastIteration != nullptr && + NumIterations != nullptr && PreCond != nullptr && + Cond != nullptr && Init != nullptr && Inc != nullptr; + } + + /// Initialize all the fields to null. + /// \param Size Number of elements in the + /// counters/finals/updates/dependent_counters/dependent_inits/finals_conditions + /// arrays. + void clear(unsigned Size) { + IterationVarRef = nullptr; + LastIteration = nullptr; + CalcLastIteration = nullptr; + PreCond = nullptr; + Cond = nullptr; + Init = nullptr; + Inc = nullptr; + IL = nullptr; + LB = nullptr; + UB = nullptr; + ST = nullptr; + EUB = nullptr; + NLB = nullptr; + NUB = nullptr; + NumIterations = nullptr; + PrevLB = nullptr; + PrevUB = nullptr; + DistInc = nullptr; + PrevEUB = nullptr; + Counters.resize(Size); + PrivateCounters.resize(Size); + Inits.resize(Size); + Updates.resize(Size); + Finals.resize(Size); + DependentCounters.resize(Size); + DependentInits.resize(Size); + FinalsConditions.resize(Size); + for (unsigned I = 0; I < Size; ++I) { + Counters[I] = nullptr; + PrivateCounters[I] = nullptr; + Inits[I] = nullptr; + Updates[I] = nullptr; + Finals[I] = nullptr; + DependentCounters[I] = nullptr; + DependentInits[I] = nullptr; + FinalsConditions[I] = nullptr; + } + PreInits = nullptr; + DistCombinedFields.LB = nullptr; + DistCombinedFields.UB = nullptr; + DistCombinedFields.EUB = nullptr; + DistCombinedFields.Init = nullptr; + DistCombinedFields.Cond = nullptr; + DistCombinedFields.NLB = nullptr; + DistCombinedFields.NUB = nullptr; + DistCombinedFields.DistCond = nullptr; + DistCombinedFields.ParForInDistCond = nullptr; + } + }; + + /// Get number of collapsed loops. + unsigned getLoopsNumber() const { return NumAssociatedLoops; } + + /// Try to find the next loop sub-statement in the specified statement \p + /// CurStmt. + /// \param TryImperfectlyNestedLoops true, if we need to try to look for the + /// imperfectly nested loop. + static Stmt *tryToFindNextInnerLoop(Stmt *CurStmt, + bool TryImperfectlyNestedLoops); + static const Stmt *tryToFindNextInnerLoop(const Stmt *CurStmt, + bool TryImperfectlyNestedLoops) { + return tryToFindNextInnerLoop(const_cast(CurStmt), + TryImperfectlyNestedLoops); + } + + /// Calls the specified callback function for all the loops in \p CurStmt, + /// from the outermost to the innermost. + static bool + doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops, + unsigned NumLoops, + llvm::function_ref Callback); + static bool + doForAllLoops(const Stmt *CurStmt, bool TryImperfectlyNestedLoops, + unsigned NumLoops, + llvm::function_ref Callback) { + auto &&NewCallback = [Callback](unsigned Cnt, Stmt *CurStmt) { + return Callback(Cnt, CurStmt); + }; + return doForAllLoops(const_cast(CurStmt), TryImperfectlyNestedLoops, + NumLoops, NewCallback); + } + + /// Calls the specified callback function for all the loop bodies in \p + /// CurStmt, from the outermost loop to the innermost. + static void doForAllLoopsBodies( + Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, + llvm::function_ref Callback); + static void doForAllLoopsBodies( + const Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, + llvm::function_ref Callback) { + auto &&NewCallback = [Callback](unsigned Cnt, Stmt *Loop, Stmt *Body) { + Callback(Cnt, Loop, Body); + }; + doForAllLoopsBodies(const_cast(CurStmt), TryImperfectlyNestedLoops, + NumLoops, NewCallback); + } + + static bool classof(const Stmt *T) { + if (auto *D = dyn_cast(T)) + return isOpenMPLoopDirective(D->getDirectiveKind()); + return false; + } +}; + /// This is a common base class for loop directives ('omp simd', 'omp /// for', 'omp for simd' etc.). It is responsible for the loop code generation. /// -class OMPLoopDirective : public OMPExecutableDirective { +class OMPLoopDirective : public OMPLoopBasedDirective { friend class ASTStmtReader; - /// Number of collapsed loops as specified by 'collapse' clause. - unsigned CollapsedNum = 0; /// Offsets to the stored exprs. /// This enumeration contains offsets to all the pointers to children @@ -454,7 +701,7 @@ /// The first 9 children are necessary for all the loop directives, /// the next 8 are specific to the worksharing ones, and the next 11 are /// used for combined constructs containing two pragmas associated to loops. - /// After the fixed children, three arrays of length CollapsedNum are + /// After the fixed children, three arrays of length NumAssociatedLoops are /// allocated: loop counters, their updates and final values. /// PrevLowerBound and PrevUpperBound are used to communicate blocking /// information in composite constructs which require loop blocking @@ -512,63 +759,63 @@ MutableArrayRef getCounters() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind())]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } /// Get the private counters storage. MutableArrayRef getPrivateCounters() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind()) + - CollapsedNum]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + getLoopsNumber()]); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } /// Get the updates storage. MutableArrayRef getInits() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind()) + - 2 * CollapsedNum]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + 2 * getLoopsNumber()]); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } /// Get the updates storage. MutableArrayRef getUpdates() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind()) + - 3 * CollapsedNum]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + 3 * getLoopsNumber()]); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } /// Get the final counter updates storage. MutableArrayRef getFinals() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind()) + - 4 * CollapsedNum]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + 4 * getLoopsNumber()]); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } /// Get the dependent counters storage. MutableArrayRef getDependentCounters() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind()) + - 5 * CollapsedNum]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + 5 * getLoopsNumber()]); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } /// Get the dependent inits storage. MutableArrayRef getDependentInits() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind()) + - 6 * CollapsedNum]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + 6 * getLoopsNumber()]); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } /// Get the finals conditions storage. MutableArrayRef getFinalsConditions() { auto **Storage = reinterpret_cast( &Data->getChildren()[getArraysOffset(getDirectiveKind()) + - 7 * CollapsedNum]); - return llvm::makeMutableArrayRef(Storage, CollapsedNum); + 7 * getLoopsNumber()]); + return llvm::makeMutableArrayRef(Storage, getLoopsNumber()); } protected: @@ -583,8 +830,7 @@ OMPLoopDirective(StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum) - : OMPExecutableDirective(SC, Kind, StartLoc, EndLoc), - CollapsedNum(CollapsedNum) {} + : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, CollapsedNum) {} /// Offset to the start of children expression arrays. static unsigned getArraysOffset(OpenMPDirectiveKind Kind) { @@ -753,180 +999,6 @@ void setFinalsConditions(ArrayRef A); public: - /// The expressions built to support OpenMP loops in combined/composite - /// pragmas (e.g. pragma omp distribute parallel for) - struct DistCombinedHelperExprs { - /// DistributeLowerBound - used when composing 'omp distribute' with - /// 'omp for' in a same construct. - Expr *LB; - /// DistributeUpperBound - used when composing 'omp distribute' with - /// 'omp for' in a same construct. - Expr *UB; - /// DistributeEnsureUpperBound - used when composing 'omp distribute' - /// with 'omp for' in a same construct, EUB depends on DistUB - Expr *EUB; - /// Distribute loop iteration variable init used when composing 'omp - /// distribute' - /// with 'omp for' in a same construct - Expr *Init; - /// Distribute Loop condition used when composing 'omp distribute' - /// with 'omp for' in a same construct - Expr *Cond; - /// Update of LowerBound for statically scheduled omp loops for - /// outer loop in combined constructs (e.g. 'distribute parallel for') - Expr *NLB; - /// Update of UpperBound for statically scheduled omp loops for - /// outer loop in combined constructs (e.g. 'distribute parallel for') - Expr *NUB; - /// Distribute Loop condition used when composing 'omp distribute' - /// with 'omp for' in a same construct when schedule is chunked. - Expr *DistCond; - /// 'omp parallel for' loop condition used when composed with - /// 'omp distribute' in the same construct and when schedule is - /// chunked and the chunk size is 1. - Expr *ParForInDistCond; - }; - - /// The expressions built for the OpenMP loop CodeGen for the - /// whole collapsed loop nest. - struct HelperExprs { - /// Loop iteration variable. - Expr *IterationVarRef; - /// Loop last iteration number. - Expr *LastIteration; - /// Loop number of iterations. - Expr *NumIterations; - /// Calculation of last iteration. - Expr *CalcLastIteration; - /// Loop pre-condition. - Expr *PreCond; - /// Loop condition. - Expr *Cond; - /// Loop iteration variable init. - Expr *Init; - /// Loop increment. - Expr *Inc; - /// IsLastIteration - local flag variable passed to runtime. - Expr *IL; - /// LowerBound - local variable passed to runtime. - Expr *LB; - /// UpperBound - local variable passed to runtime. - Expr *UB; - /// Stride - local variable passed to runtime. - Expr *ST; - /// EnsureUpperBound -- expression UB = min(UB, NumIterations). - Expr *EUB; - /// Update of LowerBound for statically scheduled 'omp for' loops. - Expr *NLB; - /// Update of UpperBound for statically scheduled 'omp for' loops. - Expr *NUB; - /// PreviousLowerBound - local variable passed to runtime in the - /// enclosing schedule or null if that does not apply. - Expr *PrevLB; - /// PreviousUpperBound - local variable passed to runtime in the - /// enclosing schedule or null if that does not apply. - Expr *PrevUB; - /// DistInc - increment expression for distribute loop when found - /// combined with a further loop level (e.g. in 'distribute parallel for') - /// expression IV = IV + ST - Expr *DistInc; - /// PrevEUB - expression similar to EUB but to be used when loop - /// scheduling uses PrevLB and PrevUB (e.g. in 'distribute parallel for' - /// when ensuring that the UB is either the calculated UB by the runtime or - /// the end of the assigned distribute chunk) - /// expression UB = min (UB, PrevUB) - Expr *PrevEUB; - /// Counters Loop counters. - SmallVector Counters; - /// PrivateCounters Loop counters. - SmallVector PrivateCounters; - /// Expressions for loop counters inits for CodeGen. - SmallVector Inits; - /// Expressions for loop counters update for CodeGen. - SmallVector Updates; - /// Final loop counter values for GodeGen. - SmallVector Finals; - /// List of counters required for the generation of the non-rectangular - /// loops. - SmallVector DependentCounters; - /// List of initializers required for the generation of the non-rectangular - /// loops. - SmallVector DependentInits; - /// List of final conditions required for the generation of the - /// non-rectangular loops. - SmallVector FinalsConditions; - /// Init statement for all captured expressions. - Stmt *PreInits; - - /// Expressions used when combining OpenMP loop pragmas - DistCombinedHelperExprs DistCombinedFields; - - /// Check if all the expressions are built (does not check the - /// worksharing ones). - bool builtAll() { - return IterationVarRef != nullptr && LastIteration != nullptr && - NumIterations != nullptr && PreCond != nullptr && - Cond != nullptr && Init != nullptr && Inc != nullptr; - } - - /// Initialize all the fields to null. - /// \param Size Number of elements in the - /// counters/finals/updates/dependent_counters/dependent_inits/finals_conditions - /// arrays. - void clear(unsigned Size) { - IterationVarRef = nullptr; - LastIteration = nullptr; - CalcLastIteration = nullptr; - PreCond = nullptr; - Cond = nullptr; - Init = nullptr; - Inc = nullptr; - IL = nullptr; - LB = nullptr; - UB = nullptr; - ST = nullptr; - EUB = nullptr; - NLB = nullptr; - NUB = nullptr; - NumIterations = nullptr; - PrevLB = nullptr; - PrevUB = nullptr; - DistInc = nullptr; - PrevEUB = nullptr; - Counters.resize(Size); - PrivateCounters.resize(Size); - Inits.resize(Size); - Updates.resize(Size); - Finals.resize(Size); - DependentCounters.resize(Size); - DependentInits.resize(Size); - FinalsConditions.resize(Size); - for (unsigned i = 0; i < Size; ++i) { - Counters[i] = nullptr; - PrivateCounters[i] = nullptr; - Inits[i] = nullptr; - Updates[i] = nullptr; - Finals[i] = nullptr; - DependentCounters[i] = nullptr; - DependentInits[i] = nullptr; - FinalsConditions[i] = nullptr; - } - PreInits = nullptr; - DistCombinedFields.LB = nullptr; - DistCombinedFields.UB = nullptr; - DistCombinedFields.EUB = nullptr; - DistCombinedFields.Init = nullptr; - DistCombinedFields.Cond = nullptr; - DistCombinedFields.NLB = nullptr; - DistCombinedFields.NUB = nullptr; - DistCombinedFields.DistCond = nullptr; - DistCombinedFields.ParForInDistCond = nullptr; - } - }; - - /// Get number of collapsed loops. - unsigned getCollapsedNumber() const { return CollapsedNum; } - Expr *getIterationVariable() const { return cast(Data->getChildren()[IterationVariableOffset]); } @@ -1067,17 +1139,6 @@ "expected loop bound distribute sharing directive"); return cast(Data->getChildren()[CombinedParForInDistConditionOffset]); } - /// Try to find the next loop sub-statement in the specified statement \p - /// CurStmt. - /// \param TryImperfectlyNestedLoops true, if we need to try to look for the - /// imperfectly nested loop. - static Stmt *tryToFindNextInnerLoop(Stmt *CurStmt, - bool TryImperfectlyNestedLoops); - static const Stmt *tryToFindNextInnerLoop(const Stmt *CurStmt, - bool TryImperfectlyNestedLoops) { - return tryToFindNextInnerLoop(const_cast(CurStmt), - TryImperfectlyNestedLoops); - } Stmt *getBody(); const Stmt *getBody() const { return const_cast(this)->getBody(); @@ -1263,7 +1324,7 @@ /// Sets special task reduction descriptor. void setTaskReductionRefExpr(Expr *E) { - Data->getChildren()[numLoopChildren(getCollapsedNumber(), + Data->getChildren()[numLoopChildren(getLoopsNumber(), llvm::omp::OMPD_for)] = E; } @@ -1303,7 +1364,7 @@ /// Returns special task reduction reference expression. Expr *getTaskReductionRefExpr() { return cast_or_null(Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_for)]); + getLoopsNumber(), llvm::omp::OMPD_for)]); } const Expr *getTaskReductionRefExpr() const { return const_cast(this)->getTaskReductionRefExpr(); @@ -1728,7 +1789,7 @@ /// Sets special task reduction descriptor. void setTaskReductionRefExpr(Expr *E) { - Data->getChildren()[numLoopChildren(getCollapsedNumber(), + Data->getChildren()[numLoopChildren(getLoopsNumber(), llvm::omp::OMPD_parallel_for)] = E; } @@ -1770,7 +1831,7 @@ /// Returns special task reduction reference expression. Expr *getTaskReductionRefExpr() { return cast_or_null(Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_parallel_for)]); + getLoopsNumber(), llvm::omp::OMPD_parallel_for)]); } const Expr *getTaskReductionRefExpr() const { return const_cast(this) @@ -2884,7 +2945,7 @@ /// Sets special task reduction descriptor. void setTaskReductionRefExpr(Expr *E) { Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_target_parallel_for)] = E; + getLoopsNumber(), llvm::omp::OMPD_target_parallel_for)] = E; } /// Set cancel state. @@ -2925,7 +2986,7 @@ /// Returns special task reduction reference expression. Expr *getTaskReductionRefExpr() { return cast_or_null(Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_target_parallel_for)]); + getLoopsNumber(), llvm::omp::OMPD_target_parallel_for)]); } const Expr *getTaskReductionRefExpr() const { return const_cast(this) @@ -3696,7 +3757,7 @@ /// Sets special task reduction descriptor. void setTaskReductionRefExpr(Expr *E) { Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_distribute_parallel_for)] = E; + getLoopsNumber(), llvm::omp::OMPD_distribute_parallel_for)] = E; } /// Set cancel state. @@ -3737,7 +3798,7 @@ /// Returns special task reduction reference expression. Expr *getTaskReductionRefExpr() { return cast_or_null(Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_distribute_parallel_for)]); + getLoopsNumber(), llvm::omp::OMPD_distribute_parallel_for)]); } const Expr *getTaskReductionRefExpr() const { return const_cast(this) @@ -4255,8 +4316,7 @@ /// Sets special task reduction descriptor. void setTaskReductionRefExpr(Expr *E) { Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_teams_distribute_parallel_for)] = - E; + getLoopsNumber(), llvm::omp::OMPD_teams_distribute_parallel_for)] = E; } /// Set cancel state. @@ -4295,7 +4355,7 @@ /// Returns special task reduction reference expression. Expr *getTaskReductionRefExpr() { return cast_or_null(Data->getChildren()[numLoopChildren( - getCollapsedNumber(), llvm::omp::OMPD_teams_distribute_parallel_for)]); + getLoopsNumber(), llvm::omp::OMPD_teams_distribute_parallel_for)]); } const Expr *getTaskReductionRefExpr() const { return const_cast(this) @@ -4472,7 +4532,7 @@ /// Sets special task reduction descriptor. void setTaskReductionRefExpr(Expr *E) { Data->getChildren()[numLoopChildren( - getCollapsedNumber(), + getLoopsNumber(), llvm::omp::OMPD_target_teams_distribute_parallel_for)] = E; } @@ -4512,7 +4572,7 @@ /// Returns special task reduction reference expression. Expr *getTaskReductionRefExpr() { return cast_or_null(Data->getChildren()[numLoopChildren( - getCollapsedNumber(), + getLoopsNumber(), llvm::omp::OMPD_target_teams_distribute_parallel_for)]); } const Expr *getTaskReductionRefExpr() const { @@ -4666,6 +4726,82 @@ } }; +/// This represents the '#pragma omp tile' loop transformation directive. +class OMPTileDirective final : public OMPLoopBasedDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + /// Default list of offsets. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPTileDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) + : OMPLoopBasedDirective(OMPTileDirectiveClass, llvm::omp::OMPD_tile, + StartLoc, EndLoc, NumLoops) {} + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for '#pragma omp tile'. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the introducer (e.g. the 'omp' token). + /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param Clauses The directive's clauses. + /// \param NumLoops Number of associated loops (number of items in the + /// 'sizes' clause). + /// \param AssociatedStmt The outermost associated loop. + /// \param TransformedStmt The loop nest after tiling, or nullptr in + /// dependent contexts. + /// \param PreInits Helper preinits statements for the loop nest. + static OMPTileDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, + unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits); + + /// Build an empty '#pragma omp tile' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumClauses Number of clauses to allocate. + /// \param NumLoops Number of associated loops to allocate. + static OMPTileDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, + unsigned NumLoops); + + unsigned getNumAssociatedLoops() const { return getLoopsNumber(); } + + /// Gets/sets the associated loops after tiling. + /// + /// This is in de-sugared format stored as a CompoundStmt. + /// + /// \code + /// for (...) + /// ... + /// \endcode + /// + /// Note that if the generated loops a become associated loops of another + /// directive, they may need to be hoisted before them. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPTileDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -336,6 +336,8 @@ // OpenMP def err_omp_more_one_clause : Error< "directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">; +def err_omp_required_clause : Error< + "directive '#pragma omp %0' requires the '%1' clause">; // Static Analyzer Core def err_unknown_analyzer_checker_or_package : Error< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10479,6 +10479,8 @@ "the referenced item is not found in any private clause on the same directive">; def err_omp_stmt_depends_on_loop_counter : Error< "the loop %select{initializer|condition}0 expression depends on the current loop control variable">; +def err_omp_invariant_dependency : Error< + "expected loop invariant expression">; def err_omp_invariant_or_linear_dependency : Error< "expected loop invariant expression or ' * %0 + ' kind of expression">; def err_omp_wrong_dependency_iterator_type : Error< diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -267,6 +267,11 @@ /// functions bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind); +/// Checks if the specified directive is a loop transformation directive. +/// \param DKind Specified directive. +/// \return True iff the directive is a loop transformation. +bool isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind); + /// Return the captured regions of an OpenMP directive. void getOpenMPCaptureRegions( llvm::SmallVectorImpl &CaptureRegions, diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -217,9 +217,11 @@ // OpenMP Directives. def OMPExecutableDirective : StmtNode; -def OMPLoopDirective : StmtNode; +def OMPLoopBasedDirective : StmtNode; +def OMPLoopDirective : StmtNode; def OMPParallelDirective : StmtNode; def OMPSimdDirective : StmtNode; +def OMPTileDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3262,6 +3262,10 @@ OMPClause *ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind, OpenMPClauseKind Kind, bool ParseOnly); + + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. + OMPClause *ParseOpenMPSizesClause(); + /// Parses clause without any additional arguments. /// /// \param Kind Kind of current clause. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10507,6 +10507,11 @@ ActOnOpenMPSimdDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '#pragma omp tile' after parsing of its clauses and + /// the associated statement. + StmtResult ActOnOpenMPTileDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult @@ -10843,6 +10848,11 @@ OMPClause *ActOnOpenMPSimdlenClause(Expr *Length, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-form 'sizes' clause. + OMPClause *ActOnOpenMPSizesClause(ArrayRef SizeExprs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'collapse' clause. OMPClause *ActOnOpenMPCollapseClause(Expr *NumForLoops, SourceLocation StartLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1889,6 +1889,7 @@ // OpenMP directives STMT_OMP_PARALLEL_DIRECTIVE, STMT_OMP_SIMD_DIRECTIVE, + STMT_OMP_TILE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -100,6 +100,7 @@ case OMPC_proc_bind: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -188,6 +189,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -901,6 +903,25 @@ return new (Mem) OMPInReductionClause(N); } +OMPSizesClause *OMPSizesClause::Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, + ArrayRef Sizes) { + OMPSizesClause *Clause = CreateEmpty(C, Sizes.size()); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setLocEnd(EndLoc); + Clause->setSizesRefs(Sizes); + return Clause; +} + +OMPSizesClause *OMPSizesClause::CreateEmpty(const ASTContext &C, + unsigned NumSizes) { + void *Mem = C.Allocate(totalSizeToAlloc(NumSizes)); + return new (Mem) OMPSizesClause(NumSizes); +} + OMPAllocateClause * OMPAllocateClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, @@ -1528,6 +1549,18 @@ OS << ")"; } +void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) { + OS << "sizes("; + bool First = true; + for (auto Size : Node->getSizesRefs()) { + if (!First) + OS << ", "; + Size->printPretty(OS, nullptr, Policy, 0); + First = false; + } + OS << ")"; +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -74,8 +74,9 @@ return getRawStmt(); } -Stmt *OMPLoopDirective::tryToFindNextInnerLoop(Stmt *CurStmt, - bool TryImperfectlyNestedLoops) { +Stmt * +OMPLoopBasedDirective::tryToFindNextInnerLoop(Stmt *CurStmt, + bool TryImperfectlyNestedLoops) { Stmt *OrigStmt = CurStmt; CurStmt = CurStmt->IgnoreContainers(); // Additional work for imperfectly nested loops, introduced in OpenMP 5.0. @@ -91,7 +92,8 @@ for (Stmt *S : CS->body()) { if (!S) continue; - if (isa(S) || isa(S)) { + if (isa(S) || isa(S) || + (isa(S) && !isa(S))) { // Only single loop construct is allowed. if (CurStmt) { CurStmt = OrigStmt; @@ -118,75 +120,110 @@ return CurStmt; } -Stmt *OMPLoopDirective::getBody() { - // This relies on the loop form is already checked by Sema. - Stmt *Body = Data->getRawStmt()->IgnoreContainers(); - if (auto *For = dyn_cast(Body)) { - Body = For->getBody(); - } else { - assert(isa(Body) && - "Expected canonical for loop or range-based for loop."); - Body = cast(Body)->getBody(); - } - for (unsigned Cnt = 1; Cnt < CollapsedNum; ++Cnt) { - Body = tryToFindNextInnerLoop(Body, /*TryImperfectlyNestedLoops=*/true); - if (auto *For = dyn_cast(Body)) { - Body = For->getBody(); +bool OMPLoopBasedDirective::doForAllLoops( + Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, + llvm::function_ref Callback) { + CurStmt = CurStmt->IgnoreContainers(); + for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) { + if (auto *Dir = dyn_cast(CurStmt)) + CurStmt = Dir->getTransformedStmt(); + if (Callback(Cnt, CurStmt)) + return false; + // Move on to the next nested for loop, or to the loop body. + // OpenMP [2.8.1, simd construct, Restrictions] + // All loops associated with the construct must be perfectly nested; that + // is, there must be no intervening code nor any OpenMP directive between + // any two loops. + if (auto *For = dyn_cast(CurStmt)) { + CurStmt = For->getBody(); } else { - assert(isa(Body) && - "Expected canonical for loop or range-based for loop."); - Body = cast(Body)->getBody(); + assert(isa(CurStmt) && + "Expected canonical for or range-based for loops."); + CurStmt = cast(CurStmt)->getBody(); } + CurStmt = OMPLoopBasedDirective::tryToFindNextInnerLoop( + CurStmt, TryImperfectlyNestedLoops); } + return true; +} + +void OMPLoopBasedDirective::doForAllLoopsBodies( + Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, + llvm::function_ref Callback) { + bool Res = OMPLoopBasedDirective::doForAllLoops( + CurStmt, TryImperfectlyNestedLoops, NumLoops, + [Callback](unsigned Cnt, Stmt *Loop) { + Stmt *Body = nullptr; + if (auto *For = dyn_cast(Loop)) { + Body = For->getBody(); + } else { + assert(isa(Loop) && + "Expected canonical for or range-based for loops."); + Body = cast(Loop)->getBody(); + } + Callback(Cnt, Loop, Body); + return false; + }); + assert(Res && "Expected only loops"); + (void)Res; +} + +Stmt *OMPLoopDirective::getBody() { + // This relies on the loop form is already checked by Sema. + Stmt *Body = nullptr; + OMPLoopBasedDirective::doForAllLoopsBodies( + Data->getRawStmt(), /*TryImperfectlyNestedLoops=*/true, + NumAssociatedLoops, + [&Body](unsigned, Stmt *, Stmt *BodyStmt) { Body = BodyStmt; }); return Body; } void OMPLoopDirective::setCounters(ArrayRef A) { - assert(A.size() == getCollapsedNumber() && + assert(A.size() == getLoopsNumber() && "Number of loop counters is not the same as the collapsed number"); llvm::copy(A, getCounters().begin()); } void OMPLoopDirective::setPrivateCounters(ArrayRef A) { - assert(A.size() == getCollapsedNumber() && "Number of loop private counters " - "is not the same as the collapsed " - "number"); + assert(A.size() == getLoopsNumber() && "Number of loop private counters " + "is not the same as the collapsed " + "number"); llvm::copy(A, getPrivateCounters().begin()); } void OMPLoopDirective::setInits(ArrayRef A) { - assert(A.size() == getCollapsedNumber() && + assert(A.size() == getLoopsNumber() && "Number of counter inits is not the same as the collapsed number"); llvm::copy(A, getInits().begin()); } void OMPLoopDirective::setUpdates(ArrayRef A) { - assert(A.size() == getCollapsedNumber() && + assert(A.size() == getLoopsNumber() && "Number of counter updates is not the same as the collapsed number"); llvm::copy(A, getUpdates().begin()); } void OMPLoopDirective::setFinals(ArrayRef A) { - assert(A.size() == getCollapsedNumber() && + assert(A.size() == getLoopsNumber() && "Number of counter finals is not the same as the collapsed number"); llvm::copy(A, getFinals().begin()); } void OMPLoopDirective::setDependentCounters(ArrayRef A) { assert( - A.size() == getCollapsedNumber() && + A.size() == getLoopsNumber() && "Number of dependent counters is not the same as the collapsed number"); llvm::copy(A, getDependentCounters().begin()); } void OMPLoopDirective::setDependentInits(ArrayRef A) { - assert(A.size() == getCollapsedNumber() && + assert(A.size() == getLoopsNumber() && "Number of dependent inits is not the same as the collapsed number"); llvm::copy(A, getDependentInits().begin()); } void OMPLoopDirective::setFinalsConditions(ArrayRef A) { - assert(A.size() == getCollapsedNumber() && + assert(A.size() == getLoopsNumber() && "Number of finals conditions is not the same as the collapsed number"); llvm::copy(A, getFinalsConditions().begin()); } @@ -291,6 +328,27 @@ numLoopChildren(CollapsedNum, OMPD_for) + 1, CollapsedNum); } +OMPTileDirective * +OMPTileDirective::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, ArrayRef Clauses, + unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits) { + OMPTileDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + return Dir; +} + +OMPTileDirective *OMPTileDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -660,6 +660,11 @@ PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPTileDirective(OMPTileDirective *Node) { + Indent() << "#pragma omp tile"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -462,6 +462,12 @@ Profiler->VisitStmt(C->getSimdlen()); } +void OMPClauseProfiler::VisitOMPSizesClause(const OMPSizesClause *C) { + for (auto E : C->getSizesRefs()) + if (E) + Profiler->VisitExpr(E); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); @@ -848,10 +854,14 @@ P.Visit(*I); } -void StmtProfiler::VisitOMPLoopDirective(const OMPLoopDirective *S) { +void StmtProfiler::VisitOMPLoopBasedDirective(const OMPLoopBasedDirective *S) { VisitOMPExecutableDirective(S); } +void StmtProfiler::VisitOMPLoopDirective(const OMPLoopDirective *S) { + VisitOMPLoopBasedDirective(S); +} + void StmtProfiler::VisitOMPParallelDirective(const OMPParallelDirective *S) { VisitOMPExecutableDirective(S); } @@ -860,6 +870,10 @@ VisitOMPLoopDirective(S); } +void StmtProfiler::VisitOMPTileDirective(const OMPTileDirective *S) { + VisitOMPLoopBasedDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -130,6 +130,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -370,6 +371,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -446,7 +448,7 @@ DKind == OMPD_target_teams_distribute || DKind == OMPD_target_teams_distribute_parallel_for || DKind == OMPD_target_teams_distribute_parallel_for_simd || - DKind == OMPD_target_teams_distribute_simd; + DKind == OMPD_target_teams_distribute_simd || DKind == OMPD_tile; } bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) { @@ -573,6 +575,10 @@ Kind == OMPD_target_teams_distribute_parallel_for_simd; } +bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { + return DKind == OMPD_tile; +} + void clang::getOpenMPCaptureRegions( SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind) { @@ -656,6 +662,9 @@ case OMPD_distribute_simd: CaptureRegions.push_back(OMPD_unknown); break; + case OMPD_tile: + // loop transformations do not introduce captures. + break; case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6631,6 +6631,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -6948,6 +6949,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -9455,6 +9457,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -10289,6 +10292,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -10971,6 +10975,7 @@ case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -200,6 +200,9 @@ case Stmt::OMPSimdDirectiveClass: EmitOMPSimdDirective(cast(*S)); break; + case Stmt::OMPTileDirectiveClass: + EmitOMPTileDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -133,50 +133,53 @@ /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { - void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { + void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { + const DeclStmt *PreInits; CodeGenFunction::OMPMapVars PreCondVars; - llvm::DenseSet EmittedAsPrivate; - for (const auto *E : S.counters()) { - const auto *VD = cast(cast(E)->getDecl()); - EmittedAsPrivate.insert(VD->getCanonicalDecl()); - (void)PreCondVars.setVarAddr( - CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); - } - // Mark private vars as undefs. - for (const auto *C : S.getClausesOfKind()) { - for (const Expr *IRef : C->varlists()) { - const auto *OrigVD = cast(cast(IRef)->getDecl()); - if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - (void)PreCondVars.setVarAddr( - CGF, OrigVD, - Address(llvm::UndefValue::get( - CGF.ConvertTypeForMem(CGF.getContext().getPointerType( - OrigVD->getType().getNonReferenceType()))), - CGF.getContext().getDeclAlign(OrigVD))); - } + if (auto *LD = dyn_cast(&S)) { + llvm::DenseSet EmittedAsPrivate; + for (const auto *E : LD->counters()) { + const auto *VD = cast(cast(E)->getDecl()); + EmittedAsPrivate.insert(VD->getCanonicalDecl()); + (void)PreCondVars.setVarAddr( + CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); } - } - (void)PreCondVars.apply(CGF); - // Emit init, __range and __end variables for C++ range loops. - const Stmt *Body = - S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); - for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { - Body = OMPLoopDirective::tryToFindNextInnerLoop( - Body, /*TryImperfectlyNestedLoops=*/true); - if (auto *For = dyn_cast(Body)) { - Body = For->getBody(); - } else { - assert(isa(Body) && - "Expected canonical for loop or range-based for loop."); - auto *CXXFor = cast(Body); - if (const Stmt *Init = CXXFor->getInit()) - CGF.EmitStmt(Init); - CGF.EmitStmt(CXXFor->getRangeStmt()); - CGF.EmitStmt(CXXFor->getEndStmt()); - Body = CXXFor->getBody(); + // Mark private vars as undefs. + for (const auto *C : LD->getClausesOfKind()) { + for (const Expr *IRef : C->varlists()) { + const auto *OrigVD = + cast(cast(IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + (void)PreCondVars.setVarAddr( + CGF, OrigVD, + Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( + CGF.getContext().getPointerType( + OrigVD->getType().getNonReferenceType()))), + CGF.getContext().getDeclAlign(OrigVD))); + } + } } + (void)PreCondVars.apply(CGF); + // Emit init, __range and __end variables for C++ range loops. + (void)OMPLoopBasedDirective::doForAllLoops( + LD->getInnermostCapturedStmt()->getCapturedStmt(), + /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), + [&CGF](unsigned Cnt, const Stmt *CurStmt) { + if (const auto *CXXFor = dyn_cast(CurStmt)) { + if (const Stmt *Init = CXXFor->getInit()) + CGF.EmitStmt(Init); + CGF.EmitStmt(CXXFor->getRangeStmt()); + CGF.EmitStmt(CXXFor->getEndStmt()); + } + return false; + }); + PreInits = cast_or_null(LD->getPreInits()); + } else if (const auto *Tile = dyn_cast(&S)) { + PreInits = cast_or_null(Tile->getPreInits()); + } else { + llvm_unreachable("Unknown loop-based directive kind."); } - if (const auto *PreInits = cast_or_null(S.getPreInits())) { + if (PreInits) { for (const auto *I : PreInits->decls()) CGF.EmitVarDecl(cast(*I)); } @@ -184,7 +187,7 @@ } public: - OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) + OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) : CodeGenFunction::RunCleanupsScope(CGF) { emitPreInitStmt(CGF, S); } @@ -1755,6 +1758,31 @@ checkForLastprivateConditionalUpdate(*this, S); } +namespace { +/// RAII to handle scopes for loop transformation directives. +class OMPTransformDirectiveScopeRAII { + OMPLoopScope *Scope = nullptr; + CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; + CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; + +public: + OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { + if (const auto *Dir = dyn_cast(S)) { + Scope = new OMPLoopScope(CGF, *Dir); + CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); + CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); + } + } + ~OMPTransformDirectiveScopeRAII() { + if (!Scope) + return; + delete CapInfoRAII; + delete CGSI; + delete Scope; + } +}; +} // namespace + static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, int MaxLevel, int Level = 0) { assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); @@ -1771,6 +1799,10 @@ return; } if (SimplifiedS == NextLoop) { + OMPTransformDirectiveScopeRAII PossiblyTransformDirectiveScope(CGF, + SimplifiedS); + if (auto *Dir = dyn_cast(SimplifiedS)) + SimplifiedS = Dir->getTransformedStmt(); if (const auto *For = dyn_cast(SimplifiedS)) { S = For->getBody(); } else { @@ -1845,9 +1877,9 @@ D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); // Emit loop body. emitBody(*this, Body, - OMPLoopDirective::tryToFindNextInnerLoop( + OMPLoopBasedDirective::tryToFindNextInnerLoop( Body, /*TryImperfectlyNestedLoops=*/true), - D.getCollapsedNumber()); + D.getLoopsNumber()); // Jump to the dispatcher at the end of the loop body. if (IsInscanRegion) @@ -2062,8 +2094,7 @@ for (const auto *C : S.getClausesOfKind()) { if (!C->getNumForLoops()) continue; - for (unsigned I = S.getCollapsedNumber(), - E = C->getLoopNumIterations().size(); + for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); I < E; ++I) { const auto *DRE = cast(C->getLoopCounter(I)); const auto *VD = cast(DRE->getDecl()); @@ -2406,6 +2437,12 @@ checkForLastprivateConditionalUpdate(*this, S); } +void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { + // Emit the de-sugared statement. + OMPTransformDirectiveScopeRAII TileScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPOuterLoop( bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope, @@ -5341,6 +5378,7 @@ case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -6695,7 +6733,7 @@ for (const auto *C : D.getClausesOfKind()) { if (!C->getNumForLoops()) continue; - for (unsigned I = LD->getCollapsedNumber(), + for (unsigned I = LD->getLoopsNumber(), E = C->getLoopNumIterations().size(); I < E; ++I) { if (const auto *VD = dyn_cast( diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3401,6 +3401,7 @@ void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); + void EmitOMPTileDirective(const OMPTileDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPSectionsDirective(const OMPSectionsDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2153,6 +2153,7 @@ break; case OMPD_parallel: case OMPD_simd: + case OMPD_tile: case OMPD_task: case OMPD_taskyield: case OMPD_barrier: @@ -2387,6 +2388,7 @@ LLVM_FALLTHROUGH; case OMPD_parallel: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -2521,6 +2523,11 @@ HasAssociatedStatement = false; } + if (DKind == OMPD_tile && !FirstClauses[unsigned(OMPC_sizes)].getInt()) { + Diag(Loc, diag::err_omp_required_clause) + << getOpenMPDirectiveName(OMPD_tile) << "sizes"; + } + StmtResult AssociatedStmt; if (HasAssociatedStatement) { // The body is a block scope like in Lambdas and Blocks. @@ -2633,6 +2640,37 @@ return !IsCorrect; } +OMPClause *Parser::ParseOpenMPSizesClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SmallVector ValExprs; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + while (true) { + ExprResult Val = ParseConstantExpression(); + if (!Val.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ValExprs.push_back(Val.get()); + + if (Tok.is(tok::r_paren) || Tok.is(tok::annot_pragma_openmp_end)) + break; + + ExpectAndConsume(tok::comma); + } + + T.consumeClose(); + + return Actions.ActOnOpenMPSizesClause( + ValExprs, ClauseNameLoc, T.getOpenLocation(), T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) { SourceLocation Loc = Tok.getLocation(); ConsumeAnyToken(); @@ -2870,6 +2908,15 @@ case OMPC_affinity: Clause = ParseOpenMPVarListClause(DKind, CKind, WrongDirective); break; + case OMPC_sizes: + if (!FirstClause) { + Diag(Tok, diag::err_omp_more_one_clause) + << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0; + ErrorFound = true; + } + + Clause = ParseOpenMPSizesClause(); + break; case OMPC_uses_allocators: Clause = ParseOpenMPUsesAllocatorClause(DKind); break; diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1458,6 +1458,7 @@ case Stmt::OMPSectionDirectiveClass: case Stmt::OMPSectionsDirectiveClass: case Stmt::OMPSimdDirectiveClass: + case Stmt::OMPTileDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -3421,7 +3421,8 @@ if (S->getDirectiveKind() == OMPD_atomic || S->getDirectiveKind() == OMPD_critical || S->getDirectiveKind() == OMPD_section || - S->getDirectiveKind() == OMPD_master) { + S->getDirectiveKind() == OMPD_master || + isOpenMPLoopTransformationDirective(S->getDirectiveKind())) { Visit(S->getAssociatedStmt()); return; } @@ -3792,6 +3793,12 @@ // Check implicitly captured variables. VisitSubCaptures(S); } + + void VisitOMPTileDirective(OMPTileDirective *S) { + // #pragma omp tile does not introduce data sharing. + VisitStmt(S); + } + void VisitStmt(Stmt *S) { for (Stmt *C : S->children()) { if (C) { @@ -3956,6 +3963,7 @@ case OMPD_critical: case OMPD_section: case OMPD_master: + case OMPD_tile: break; case OMPD_simd: case OMPD_for: @@ -4478,6 +4486,10 @@ } } DSAStack->setForceVarCapturing(/*V=*/false); + } else if (isOpenMPLoopTransformationDirective( + DSAStack->getCurrentDirective())) { + assert(CaptureRegions.empty() && + "No captured regions in loop transformation directives."); } else if (CaptureRegions.size() > 1 || CaptureRegions.back() != OMPD_unknown) { if (auto *C = OMPClauseWithPreInit::get(Clause)) @@ -5153,7 +5165,8 @@ bool ErrorFound = false; ClausesWithImplicit.append(Clauses.begin(), Clauses.end()); if (AStmt && !CurContext->isDependentContext() && Kind != OMPD_atomic && - Kind != OMPD_critical && Kind != OMPD_section && Kind != OMPD_master) { + Kind != OMPD_critical && Kind != OMPD_section && Kind != OMPD_master && + !isOpenMPLoopTransformationDirective(Kind)) { assert(isa(AStmt) && "Captured statement expected"); // Check default data sharing attributes for referenced variables. @@ -5271,6 +5284,10 @@ if (LangOpts.OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; + case OMPD_tile: + Res = + ActOnOpenMPTileDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -5621,6 +5638,7 @@ case OMPC_collapse: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_default: case OMPC_proc_bind: case OMPC_private: @@ -6585,6 +6603,8 @@ class OpenMPIterationSpaceChecker { /// Reference to Sema. Sema &SemaRef; + /// Does the loop associated directive support non-rectangular loops? + bool SupportsNonRectangular; /// Data-sharing stack. DSAStackTy &Stack; /// A location for diagnostics (when there is no some better location). @@ -6633,10 +6653,10 @@ Expr *Condition = nullptr; public: - OpenMPIterationSpaceChecker(Sema &SemaRef, DSAStackTy &Stack, - SourceLocation DefaultLoc) - : SemaRef(SemaRef), Stack(Stack), DefaultLoc(DefaultLoc), - ConditionLoc(DefaultLoc) {} + OpenMPIterationSpaceChecker(Sema &SemaRef, bool SupportsNonRectangular, + DSAStackTy &Stack, SourceLocation DefaultLoc) + : SemaRef(SemaRef), SupportsNonRectangular(SupportsNonRectangular), + Stack(Stack), DefaultLoc(DefaultLoc), ConditionLoc(DefaultLoc) {} /// Check init-expr for canonical loop form and save loop counter /// variable - #Var and its initialization value - #LB. bool checkAndSetInit(Stmt *S, bool EmitDiags = true); @@ -6840,6 +6860,7 @@ const ValueDecl *DepDecl = nullptr; const ValueDecl *PrevDepDecl = nullptr; bool IsInitializer = true; + bool SupportsNonRectangular; unsigned BaseLoopId = 0; bool checkDecl(const Expr *E, const ValueDecl *VD) { if (getCanonicalDecl(VD) == getCanonicalDecl(CurLCDecl)) { @@ -6862,6 +6883,10 @@ SemaRef.Diag(VD->getLocation(), diag::note_previous_decl) << VD; return false; } + if (Data.first && !SupportsNonRectangular) { + SemaRef.Diag(E->getExprLoc(), diag::err_omp_invariant_dependency); + return false; + } if (Data.first && (DepDecl || (PrevDepDecl && getCanonicalDecl(VD) != getCanonicalDecl(PrevDepDecl)))) { @@ -6906,9 +6931,11 @@ } explicit LoopCounterRefChecker(Sema &SemaRef, DSAStackTy &Stack, const ValueDecl *CurLCDecl, bool IsInitializer, - const ValueDecl *PrevDepDecl = nullptr) + const ValueDecl *PrevDepDecl = nullptr, + bool SupportsNonRectangular = true) : SemaRef(SemaRef), Stack(Stack), CurLCDecl(CurLCDecl), - PrevDepDecl(PrevDepDecl), IsInitializer(IsInitializer) {} + PrevDepDecl(PrevDepDecl), IsInitializer(IsInitializer), + SupportsNonRectangular(SupportsNonRectangular) {} unsigned getBaseLoopId() const { assert(CurLCDecl && "Expected loop dependency."); return BaseLoopId; @@ -6925,7 +6952,7 @@ bool IsInitializer) { // Check for the non-rectangular loops. LoopCounterRefChecker LoopStmtChecker(SemaRef, Stack, LCDecl, IsInitializer, - DepDecl); + DepDecl, SupportsNonRectangular); if (LoopStmtChecker.Visit(S)) { DepDecl = LoopStmtChecker.getDepDecl(); return LoopStmtChecker.getBaseLoopId(); @@ -7578,9 +7605,9 @@ if (!Upper || !Lower) return nullptr; - ExprResult Diff = - calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, - TestIsStrictOp, /*RoundToStep=*/true, Captures); + ExprResult Diff = calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, + Step, VarType, TestIsStrictOp, + /*RoundToStep=*/true, Captures); if (!Diff.isUsable()) return nullptr; @@ -7656,9 +7683,9 @@ // Build minimum/maximum value based on number of iterations. QualType VarType = LCDecl->getType().getNonReferenceType(); - ExprResult Diff = - calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, - TestIsStrictOp, /*RoundToStep=*/false, Captures); + ExprResult Diff = calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, + Step, VarType, TestIsStrictOp, + /*RoundToStep=*/false, Captures); if (!Diff.isUsable()) return std::make_pair(nullptr, nullptr); @@ -7849,9 +7876,9 @@ if (!Upper || !Lower) return nullptr; - ExprResult Diff = calculateNumIters(SemaRef, S, DefaultLoc, Lower, Upper, - Step, VarType, /*TestIsStrictOp=*/false, - /*RoundToStep=*/false, Captures); + ExprResult Diff = calculateNumIters( + SemaRef, S, DefaultLoc, Lower, Upper, Step, VarType, + /*TestIsStrictOp=*/false, /*RoundToStep=*/false, Captures); if (!Diff.isUsable()) return nullptr; @@ -7866,7 +7893,8 @@ if (AssociatedLoops > 0 && isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { DSAStack->loopStart(); - OpenMPIterationSpaceChecker ISC(*this, *DSAStack, ForLoc); + OpenMPIterationSpaceChecker ISC(*this, /*SupportsNonRectangular=*/true, + *DSAStack, ForLoc); if (!ISC.checkAndSetInit(Init, /*EmitDiags=*/false)) { if (ValueDecl *D = ISC.getLoopDecl()) { auto *VD = dyn_cast(D); @@ -7951,6 +7979,7 @@ Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA, llvm::MutableArrayRef ResultIterSpaces, llvm::MapVector &Captures) { + bool SupportsNonRectangular = !isOpenMPLoopTransformationDirective(DKind); // OpenMP [2.9.1, Canonical Loop Form] // for (init-expr; test-expr; incr-expr) structured-block // for (range-decl: range-expr) structured-block @@ -7982,7 +8011,7 @@ assert(((For && For->getBody()) || (CXXFor && CXXFor->getBody())) && "No loop body."); - OpenMPIterationSpaceChecker ISC(SemaRef, DSA, + OpenMPIterationSpaceChecker ISC(SemaRef, SupportsNonRectangular, DSA, For ? For->getForLoc() : CXXFor->getForLoc()); // Check init. @@ -8038,7 +8067,8 @@ ISC.buildNumIterations(DSA.getCurScope(), ResultIterSpaces, (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || - isOpenMPDistributeDirective(DKind)), + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)), Captures); ResultIterSpaces[CurrentNestedLoopCount].CounterVar = ISC.buildCounterVar(Captures, DSA); @@ -8295,8 +8325,11 @@ Expr *OrderedLoopCountExpr, Stmt *AStmt, Sema &SemaRef, DSAStackTy &DSA, Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA, - OMPLoopDirective::HelperExprs &Built) { + OMPLoopBasedDirective::HelperExprs &Built) { unsigned NestedLoopCount = 1; + bool SupportsNonPerfectlyNested = (SemaRef.LangOpts.OpenMP >= 50) && + !isOpenMPLoopTransformationDirective(DKind); + if (CollapseLoopCountExpr) { // Found 'collapse' clause - calculate collapse number. Expr::EvalResult Result; @@ -8333,58 +8366,30 @@ // This is helper routine for loop directives (e.g., 'for', 'simd', // 'for simd', etc.). llvm::MapVector Captures; - SmallVector IterSpaces( - std::max(OrderedLoopCount, NestedLoopCount)); - Stmt *CurStmt = AStmt->IgnoreContainers(/* IgnoreCaptured */ true); - for (unsigned Cnt = 0; Cnt < NestedLoopCount; ++Cnt) { - if (checkOpenMPIterationSpace( - DKind, CurStmt, SemaRef, DSA, Cnt, NestedLoopCount, - std::max(OrderedLoopCount, NestedLoopCount), CollapseLoopCountExpr, - OrderedLoopCountExpr, VarsWithImplicitDSA, IterSpaces, Captures)) - return 0; - // Move on to the next nested for loop, or to the loop body. - // OpenMP [2.8.1, simd construct, Restrictions] - // All loops associated with the construct must be perfectly nested; that - // is, there must be no intervening code nor any OpenMP directive between - // any two loops. - if (auto *For = dyn_cast(CurStmt)) { - CurStmt = For->getBody(); - } else { - assert(isa(CurStmt) && - "Expected canonical for or range-based for loops."); - CurStmt = cast(CurStmt)->getBody(); - } - CurStmt = OMPLoopDirective::tryToFindNextInnerLoop( - CurStmt, SemaRef.LangOpts.OpenMP >= 50); - } - for (unsigned Cnt = NestedLoopCount; Cnt < OrderedLoopCount; ++Cnt) { - if (checkOpenMPIterationSpace( - DKind, CurStmt, SemaRef, DSA, Cnt, NestedLoopCount, - std::max(OrderedLoopCount, NestedLoopCount), CollapseLoopCountExpr, - OrderedLoopCountExpr, VarsWithImplicitDSA, IterSpaces, Captures)) - return 0; - if (Cnt > 0 && IterSpaces[Cnt].CounterVar) { - // Handle initialization of captured loop iterator variables. - auto *DRE = cast(IterSpaces[Cnt].CounterVar); - if (isa(DRE->getDecl())) { - Captures[DRE] = DRE; - } - } - // Move on to the next nested for loop, or to the loop body. - // OpenMP [2.8.1, simd construct, Restrictions] - // All loops associated with the construct must be perfectly nested; that - // is, there must be no intervening code nor any OpenMP directive between - // any two loops. - if (auto *For = dyn_cast(CurStmt)) { - CurStmt = For->getBody(); - } else { - assert(isa(CurStmt) && - "Expected canonical for or range-based for loops."); - CurStmt = cast(CurStmt)->getBody(); - } - CurStmt = OMPLoopDirective::tryToFindNextInnerLoop( - CurStmt, SemaRef.LangOpts.OpenMP >= 50); - } + unsigned NumLoops = std::max(OrderedLoopCount, NestedLoopCount); + SmallVector IterSpaces(NumLoops); + if (!OMPLoopBasedDirective::doForAllLoops( + AStmt->IgnoreContainers(!isOpenMPLoopTransformationDirective(DKind)), + SupportsNonPerfectlyNested, NumLoops, + [DKind, &SemaRef, &DSA, NumLoops, NestedLoopCount, + CollapseLoopCountExpr, OrderedLoopCountExpr, &VarsWithImplicitDSA, + &IterSpaces, &Captures](unsigned Cnt, Stmt *CurStmt) { + if (checkOpenMPIterationSpace( + DKind, CurStmt, SemaRef, DSA, Cnt, NestedLoopCount, + NumLoops, CollapseLoopCountExpr, OrderedLoopCountExpr, + VarsWithImplicitDSA, IterSpaces, Captures)) + return true; + if (Cnt > 0 && Cnt >= NestedLoopCount && + IterSpaces[Cnt].CounterVar) { + // Handle initialization of captured loop iterator variables. + auto *DRE = cast(IterSpaces[Cnt].CounterVar); + if (isa(DRE->getDecl())) { + Captures[DRE] = DRE; + } + } + return false; + })) + return 0; Built.clear(/* size */ NestedLoopCount); @@ -8532,7 +8537,8 @@ // Build variables passed into runtime, necessary for worksharing directives. ExprResult LB, UB, IL, ST, EUB, CombLB, CombUB, PrevLB, PrevUB, CombEUB; if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || - isOpenMPDistributeDirective(DKind)) { + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) { // Lower bound variable, initialized with zero. VarDecl *LBDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.lb"); LB = buildDeclRefExpr(SemaRef, LBDecl, VType, InitLoc); @@ -8630,11 +8636,12 @@ { VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv"); IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc); - Expr *RHS = - (isOpenMPWorksharingDirective(DKind) || - isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)) - ? LB.get() - : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get(); + Expr *RHS = (isOpenMPWorksharingDirective(DKind) || + isOpenMPTaskLoopDirective(DKind) || + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) + ? LB.get() + : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get(); Init = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, IV.get(), RHS); Init = SemaRef.ActOnFinishFullExpr(Init.get(), /*DiscardedValue*/ false); @@ -8672,7 +8679,8 @@ } ExprResult Cond = (isOpenMPWorksharingDirective(DKind) || - isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)) + isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) ? SemaRef.BuildBinOp(CurScope, CondLoc, UseStrictCompare ? BO_LT : BO_LE, IV.get(), BoundUB) @@ -8720,7 +8728,8 @@ // base variables for the update ExprResult NextLB, NextUB, CombNextLB, CombNextUB; if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || - isOpenMPDistributeDirective(DKind)) { + isOpenMPDistributeDirective(DKind) || + isOpenMPLoopTransformationDirective(DKind)) { // LB + ST NextLB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, LB.get(), ST.get()); if (!NextLB.isUsable()) @@ -9046,7 +9055,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -9085,7 +9094,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -9121,7 +9130,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -9318,7 +9327,7 @@ // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -9362,7 +9371,7 @@ // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -10556,7 +10565,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -10846,7 +10855,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -10883,7 +10892,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -10932,7 +10941,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -10969,7 +10978,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -11037,7 +11046,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11093,7 +11102,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11142,7 +11151,7 @@ return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -11185,7 +11194,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11229,7 +11238,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11285,7 +11294,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -11341,7 +11350,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11396,7 +11405,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will define the // nested loops number. unsigned NestedLoopCount = @@ -11452,7 +11461,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -11498,7 +11507,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11560,7 +11569,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11622,7 +11631,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11701,7 +11710,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11744,7 +11753,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11799,7 +11808,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = @@ -11858,7 +11867,7 @@ CS->getCapturedDecl()->setNothrow(); } - OMPLoopDirective::HelperExprs B; + OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( @@ -11890,6 +11899,234 @@ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { + auto SizesClauses = + OMPExecutableDirective::getClausesOfKind(Clauses); + if (SizesClauses.empty()) { + // A missing 'sizes' clause is already reported by the parser. + return StmtError(); + } + const OMPSizesClause *SizesClause = *SizesClauses.begin(); + unsigned NumLoops = SizesClause->getNumSizes(); + + // Empty statement should only be possible if there already was an error. + if (!AStmt) + return StmtError(); + + // Verify and diagnose loop nest. + SmallVector LoopHelpers(NumLoops); + Stmt *Body = nullptr; + SmallVector OriginalInits; + if (!OMPLoopBasedDirective::doForAllLoops( + AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, + NumLoops, + [this, &LoopHelpers, &Body, &OriginalInits](unsigned Cnt, + Stmt *CurStmt) { + VarsWithInheritedDSAType TmpDSA; + unsigned SingleNumLoops = + checkOpenMPLoop(OMPD_tile, nullptr, nullptr, CurStmt, *this, + *DSAStack, TmpDSA, LoopHelpers[Cnt]); + if (SingleNumLoops == 0) + return true; + assert(SingleNumLoops == 1 && "Expect single loop iteration space"); + if (auto *For = dyn_cast(CurStmt)) { + OriginalInits.push_back(For->getInit()); + Body = For->getBody(); + } else { + assert(isa(CurStmt) && + "Expected canonical for or range-based for loops."); + auto *CXXFor = cast(CurStmt); + OriginalInits.push_back(CXXFor->getBeginStmt()); + Body = CXXFor->getBody(); + } + return false; + })) + return StmtError(); + + // Delay tiling to when template is completely instantiated. + if (CurContext->isDependentContext()) + return OMPTileDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, nullptr, nullptr); + + // Collection of generated variable declaration. + SmallVector PreInits; + + // Create iteration variables for the generated loops. + SmallVector FloorIndVars; + SmallVector TileIndVars; + FloorIndVars.resize(NumLoops); + TileIndVars.resize(NumLoops); + for (unsigned I = 0; I < NumLoops; ++I) { + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; + if (auto *PI = cast_or_null(LoopHelper.PreInits)) + PreInits.append(PI->decl_begin(), PI->decl_end()); + assert(LoopHelper.Counters.size() == 1 && + "Expect single-dimensional loop iteration space"); + auto *OrigCntVar = cast(LoopHelper.Counters.front()); + std::string OrigVarName = OrigCntVar->getNameInfo().getAsString(); + DeclRefExpr *IterVarRef = cast(LoopHelper.IterationVarRef); + QualType CntTy = IterVarRef->getType(); + + // Iteration variable for the floor (i.e. outer) loop. + { + std::string FloorCntName = + (Twine(".floor_") + llvm::utostr(I) + ".iv." + OrigVarName).str(); + VarDecl *FloorCntDecl = + buildVarDecl(*this, {}, CntTy, FloorCntName, nullptr, OrigCntVar); + FloorIndVars[I] = FloorCntDecl; + } + + // Iteration variable for the tile (i.e. inner) loop. + { + std::string TileCntName = + (Twine(".tile_") + llvm::utostr(I) + ".iv." + OrigVarName).str(); + + // Reuse the iteration variable created by checkOpenMPLoop. It is also + // used by the expressions to derive the original iteration variable's + // value from the logical iteration number. + auto *TileCntDecl = cast(IterVarRef->getDecl()); + TileCntDecl->setDeclName(&PP.getIdentifierTable().get(TileCntName)); + TileIndVars[I] = TileCntDecl; + } + if (auto *PI = dyn_cast_or_null(OriginalInits[I])) + PreInits.append(PI->decl_begin(), PI->decl_end()); + // Gather declarations for the data members used as counters. + for (Expr *CounterRef : LoopHelper.Counters) { + auto *CounterDecl = cast(CounterRef)->getDecl(); + if (isa(CounterDecl)) + PreInits.push_back(CounterDecl); + } + } + + // Once the original iteration values are set, append the innermost body. + Stmt *Inner = Body; + + // Create tile loops from the inside to the outside. + for (int I = NumLoops - 1; I >= 0; --I) { + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; + Expr *NumIterations = LoopHelper.NumIterations; + auto *OrigCntVar = cast(LoopHelper.Counters[0]); + QualType CntTy = OrigCntVar->getType(); + Expr *DimTileSize = SizesClause->getSizesRefs()[I]; + Scope *CurScope = getCurScope(); + + // Commonly used variables. + DeclRefExpr *TileIV = buildDeclRefExpr(*this, TileIndVars[I], CntTy, + OrigCntVar->getExprLoc()); + DeclRefExpr *FloorIV = buildDeclRefExpr(*this, FloorIndVars[I], CntTy, + OrigCntVar->getExprLoc()); + + // For init-statement: auto .tile.iv = .floor.iv + AddInitializerToDecl(TileIndVars[I], DefaultLvalueConversion(FloorIV).get(), + /*DirectInit=*/false); + Decl *CounterDecl = TileIndVars[I]; + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), + OrigCntVar->getBeginLoc(), OrigCntVar->getEndLoc()); + if (!InitStmt.isUsable()) + return StmtError(); + + // For cond-expression: .tile.iv < min(.floor.iv + DimTileSize, + // NumIterations) + ExprResult EndOfTile = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), + BO_Add, FloorIV, DimTileSize); + if (!EndOfTile.isUsable()) + return StmtError(); + ExprResult IsPartialTile = + BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + NumIterations, EndOfTile.get()); + if (!IsPartialTile.isUsable()) + return StmtError(); + ExprResult MinTileAndIterSpace = ActOnConditionalOp( + LoopHelper.Cond->getBeginLoc(), LoopHelper.Cond->getEndLoc(), + IsPartialTile.get(), NumIterations, EndOfTile.get()); + if (!MinTileAndIterSpace.isUsable()) + return StmtError(); + ExprResult CondExpr = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), + BO_LT, TileIV, MinTileAndIterSpace.get()); + if (!CondExpr.isUsable()) + return StmtError(); + + // For incr-statement: ++.tile.iv + ExprResult IncrStmt = + BuildUnaryOp(CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, TileIV); + if (!IncrStmt.isUsable()) + return StmtError(); + + // Statements to set the original iteration variable's value from the + // logical iteration number. + // Generated for loop is: + // Original_for_init; + // for (auto .tile.iv = .floor.iv; .tile.iv < min(.floor.iv + DimTileSize, + // NumIterations); ++.tile.iv) { + // Original_Body; + // Original_counter_update; + // } + // FIXME: If the innermost body is an loop itself, inserting these + // statements stops it being recognized as a perfectly nested loop (e.g. + // for applying tiling again). If this is the case, sink the expressions + // further into the inner loop. + SmallVector BodyParts; + BodyParts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end()); + BodyParts.push_back(Inner); + Inner = CompoundStmt::Create(Context, BodyParts, Inner->getBeginLoc(), + Inner->getEndLoc()); + Inner = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, + IncrStmt.get(), Inner, LoopHelper.Init->getBeginLoc(), + LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); + } + + // Create floor loops from the inside to the outside. + for (int I = NumLoops - 1; I >= 0; --I) { + auto &LoopHelper = LoopHelpers[I]; + Expr *NumIterations = LoopHelper.NumIterations; + DeclRefExpr *OrigCntVar = cast(LoopHelper.Counters[0]); + QualType CntTy = OrigCntVar->getType(); + Expr *DimTileSize = SizesClause->getSizesRefs()[I]; + Scope *CurScope = getCurScope(); + + // Commonly used variables. + DeclRefExpr *FloorIV = buildDeclRefExpr(*this, FloorIndVars[I], CntTy, + OrigCntVar->getExprLoc()); + + // For init-statement: auto .floor.iv = 0 + AddInitializerToDecl( + FloorIndVars[I], + ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(), + /*DirectInit=*/false); + Decl *CounterDecl = FloorIndVars[I]; + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), + OrigCntVar->getBeginLoc(), OrigCntVar->getEndLoc()); + if (!InitStmt.isUsable()) + return StmtError(); + + // For cond-expression: .floor.iv < NumIterations + ExprResult CondExpr = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), + BO_LT, FloorIV, NumIterations); + if (!CondExpr.isUsable()) + return StmtError(); + + // For incr-statement: .floor.iv += DimTileSize + ExprResult IncrStmt = BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), + BO_AddAssign, FloorIV, DimTileSize); + if (!IncrStmt.isUsable()) + return StmtError(); + + Inner = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, + IncrStmt.get(), Inner, LoopHelper.Init->getBeginLoc(), + LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); + } + + return OMPTileDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, + AStmt, Inner, + buildPreInits(Context, PreInits)); +} + OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -11961,6 +12198,7 @@ case OMPC_untied: case OMPC_mergeable: case OMPC_threadprivate: + case OMPC_sizes: case OMPC_allocate: case OMPC_flush: case OMPC_read: @@ -12141,6 +12379,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_teams: + case OMPD_tile: case OMPD_for: case OMPD_sections: case OMPD_section: @@ -12217,6 +12456,7 @@ case OMPD_end_declare_target: case OMPD_teams: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12296,6 +12536,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12373,6 +12614,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12451,6 +12693,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -12528,6 +12771,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12604,6 +12848,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12683,6 +12928,7 @@ case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_simd: + case OMPD_tile: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -12712,6 +12958,7 @@ case OMPC_proc_bind: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_private: @@ -13145,6 +13392,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_schedule: @@ -13336,6 +13584,22 @@ EndLoc); } +OMPClause *Sema::ActOnOpenMPSizesClause(ArrayRef SizeExprs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + for (Expr *SizeExpr : SizeExprs) { + ExprResult NumForLoopsResult = VerifyPositiveIntegerConstantInClause( + SizeExpr, OMPC_sizes, /*StrictlyPositive=*/true); + if (!NumForLoopsResult.isUsable()) + return nullptr; + } + + DSAStack->setAssociatedLoops(SizeExprs.size()); + return OMPSizesClause::Create(Context, StartLoc, LParenLoc, EndLoc, + SizeExprs); +} + OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( OpenMPClauseKind Kind, ArrayRef Argument, Expr *Expr, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -13383,6 +13647,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_default: @@ -13638,6 +13903,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_schedule: @@ -13920,6 +14186,7 @@ case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: case OMPC_default: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1618,6 +1618,13 @@ return getSema().ActOnOpenMPSimdlenClause(Len, StartLoc, LParenLoc, EndLoc); } + OMPClause *RebuildOMPSizesClause(ArrayRef Sizes, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -8348,7 +8355,7 @@ D->getDirectiveKind() == OMPD_master) CS = D->getAssociatedStmt(); else - CS = D->getInnermostCapturedStmt()->getCapturedStmt(); + CS = D->getRawStmt(); Body = getDerived().TransformStmt(CS); } AssociatedStmt = @@ -8401,6 +8408,17 @@ return Res; } +template +StmtResult +TreeTransform::TransformOMPTileDirective(OMPTileDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName, + nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { @@ -9043,6 +9061,31 @@ E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause *TreeTransform::TransformOMPSizesClause(OMPSizesClause *C) { + SmallVector TransformedSizes; + TransformedSizes.reserve(C->getNumSizes()); + bool Changed = false; + for (Expr *E : C->getSizesRefs()) { + if (!E) { + TransformedSizes.push_back(nullptr); + continue; + } + + ExprResult T = getDerived().TransformExpr(E); + if (T.isInvalid()) + return nullptr; + if (E != T.get()) + Changed = true; + TransformedSizes.push_back(T.get()); + } + + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + return RebuildOMPSizesClause(TransformedSizes, C->getBeginLoc(), + C->getLParenLoc(), C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11730,6 +11730,11 @@ case llvm::omp::OMPC_simdlen: C = new (Context) OMPSimdlenClause(); break; + case llvm::omp::OMPC_sizes: { + unsigned NumSizes = Record.readInt(); + C = OMPSizesClause::CreateEmpty(Context, NumSizes); + break; + } case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -12021,6 +12026,12 @@ C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) { + for (Expr *&E : C->getSizesRefs()) + E = Record.readSubExpr(); + C->setLParenLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2279,13 +2279,17 @@ E->setLocEnd(readSourceLocation()); } -void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) { +void ASTStmtReader::VisitOMPLoopBasedDirective(OMPLoopBasedDirective *D) { VisitStmt(D); // Field CollapsedNum was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); } +void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) { + VisitOMPLoopBasedDirective(D); +} + void ASTStmtReader::VisitOMPParallelDirective(OMPParallelDirective *D) { VisitStmt(D); VisitOMPExecutableDirective(D); @@ -2296,6 +2300,10 @@ VisitOMPLoopDirective(D); } +void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) { + VisitOMPLoopBasedDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3145,6 +3153,13 @@ break; } + case STMT_OMP_TILE_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPTileDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } + case STMT_OMP_FOR_DIRECTIVE: { unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6138,6 +6138,13 @@ Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) { + Record.push_back(C->getNumSizes()); + for (Expr *Size : C->getSizesRefs()) + Record.AddStmt(Size); + Record.AddSourceLocation(C->getLParenLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2176,12 +2176,16 @@ Record.AddSourceLocation(E->getEndLoc()); } -void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) { +void ASTStmtWriter::VisitOMPLoopBasedDirective(OMPLoopBasedDirective *D) { VisitStmt(D); - Record.writeUInt32(D->getCollapsedNumber()); + Record.writeUInt32(D->getLoopsNumber()); VisitOMPExecutableDirective(D); } +void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) { + VisitOMPLoopBasedDirective(D); +} + void ASTStmtWriter::VisitOMPParallelDirective(OMPParallelDirective *D) { VisitStmt(D); VisitOMPExecutableDirective(D); @@ -2194,6 +2198,11 @@ Code = serialization::STMT_OMP_SIMD_DIRECTIVE; } +void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) { + VisitOMPLoopBasedDirective(D); + Code = serialization::STMT_OMP_TILE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1292,6 +1292,7 @@ case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass: case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: + case Stmt::OMPTileDirectiveClass: case Stmt::CapturedStmtClass: { const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState()); Engine.addAbortedBlock(node, currBldrCtx->getBlock()); diff --git a/clang/test/Index/openmp-tile.c b/clang/test/Index/openmp-tile.c new file mode 100644 --- /dev/null +++ b/clang/test/Index/openmp-tile.c @@ -0,0 +1,11 @@ +// RUN: c-index-test -test-load-source local %s -fopenmp -fopenmp-version=51 | FileCheck %s + +void test() { +#pragma omp tile sizes(5) + for (int i = 0; i < 65; i += 1) + ; +} + +// CHECK: openmp-tile.c:4:1: OMPTileDirective= Extent=[4:1 - 4:26] +// CHECK: openmp-tile.c:4:24: IntegerLiteral= Extent=[4:24 - 4:25] +// CHECK: openmp-tile.c:5:3: ForStmt= Extent=[5:3 - 6:6] diff --git a/clang/test/OpenMP/tile_ast_print.cpp b/clang/test/OpenMP/tile_ast_print.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/tile_ast_print.cpp @@ -0,0 +1,165 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...); + + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp tile sizes(5, 5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5,5) + // PRINT: for (int i = 7; i < 17; i += 3) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 3) + // PRINT: for (int j = 7; j < 17; j += 3) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 3) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2(int start1, int start2, int end1, int end2) { + // PRINT: #pragma omp tile sizes(5, 5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5,5) + // PRINT: for (int i = start1; i < end1; i += 1) + // DUMP-NEXT: ForStmt + for (int i = start1; i < end1; i += 1) + // PRINT: for (int j = start2; j < end2; j += 1) + // DUMP: ForStmt + for (int j = start2; j < end2; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo3( +// DUMP-LABEL: FunctionDecl {{.*}} foo3 +void foo3() { + // PRINT: #pragma omp for + // DUMP: OMPForDirective + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for + // PRINT: #pragma omp tile sizes(5) + // DUMP-NEXT: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5) + for (int i = 7; i < 17; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo4( +// DUMP-LABEL: FunctionDecl {{.*}} foo4 +void foo4() { + // PRINT: #pragma omp for collapse(3) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 3 + // DUMP-NEXT: IntegerLiteral {{.*}} 3 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(3) + // PRINT: #pragma omp tile sizes(5, 5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5, 5) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5(int start, int end, int step) { + // PRINT: #pragma omp for collapse(2) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 2 + // DUMP-NEXT: IntegerLiteral {{.*}} 2 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(2) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: #pragma omp tile sizes(5) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp tile sizes(5) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP-NEXT: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionTemplateDecl {{.*}} foo6 +template +void foo6(T start, T end) { + // PRINT: #pragma omp tile sizes(Tile) + // DUMP: OMPTileDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: DeclRefExpr {{.*}} 'Tile' 'T' + #pragma omp tile sizes(Tile) + // PRINT-NEXT: for (T i = start; i < end; i += Step) + // DUMP-NEXT: ForStmt + for (T i = start; i < end; i += Step) + // PRINT-NEXT: body(i); + // DUMP: CallExpr + body(i); +} + +// Also test instantiating the template. +void tfoo6() { + foo6(0, 42); +} + + +#endif diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -0,0 +1,1026 @@ +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...) {} + +// IR: define {{.*}}void @_ZN1SC2Ev(%struct.S* +// IR: [[THIS_ADDR:%.+]] = alloca %struct.S*, align 8 +// IR-NEXT: [[I_REF:%.+]] = alloca i32*, align 8 +// IR-NEXT: [[FLOOR:%.+]] = alloca i32, align 4 +// IR-NEXT: [[TILE:%.+]] = alloca i32, align 4 +// IR-NEXT: store %struct.S* %{{.+}}, %struct.S** [[THIS_ADDR]], align 8 +// IR-NEXT: [[THIS:%.+]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// IR-NEXT: [[I:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 0 +// IR-NEXT: store i32* [[I]], i32** [[I_REF]], align 8 +// IR-NEXT: store i32 0, i32* [[FLOOR]], align 4 +// IR-NEXT: br label %[[FOR_COND:.+]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP0:%.+]] = load i32, i32* [[FLOOR]], align 4 +// IR-NEXT: [[CMP:%.+]] = icmp slt i32 [[TMP0]], 4 +// IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END11:.+]] +// IR: [[FOR_BODY]]: +// IR-NEXT: [[TMP1:%.+]] = load i32, i32* [[FLOOR]], align 4 +// IR-NEXT: store i32 [[TMP1]], i32* [[TILE]], align 4 +// IR-NEXT: br label %[[FOR_COND3:.+]] +// IR: [[FOR_COND3]]: +// IR-NEXT: [[TMP2:%.+]] = load i32, i32* [[TILE]], align 4 +// IR-NEXT: [[TMP3:%.+]] = load i32, i32* [[FLOOR]], align 4 +// IR-NEXT: [[ADD:%.+]] = add nsw i32 [[TMP3]], 5 +// IR-NEXT: [[CMP4:%.+]] = icmp slt i32 4, [[ADD]] +// IR-NEXT: br i1 [[CMP4]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] +// IR: [[COND_TRUE]]: +// IR-NEXT: br label %[[COND_END:.+]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP4:%.+]] = load i32, i32* [[FLOOR]], align 4 +// IR-NEXT: [[ADD5:%.+]] = add nsw i32 [[TMP4]], 5 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.+]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD5]], %[[COND_FALSE]] ] +// IR-NEXT: [[CMP6:%.+]] = icmp slt i32 [[TMP2]], [[COND]] +// IR-NEXT: br i1 [[CMP6]], label %[[FOR_BODY7:.+]], label %[[FOR_END:.+]] +// IR: [[FOR_BODY7]]: +// IR-NEXT: [[TMP5:%.+]] = load i32, i32* [[TILE]], align 4 +// IR-NEXT: [[MUL:%.+]] = mul nsw i32 [[TMP5]], 3 +// IR-NEXT: [[ADD8:%.+]] = add nsw i32 7, [[MUL]] +// IR-NEXT: [[TMP6:%.+]] = load i32*, i32** [[I_REF]], align 8 +// IR-NEXT: store i32 [[ADD8]], i32* [[TMP6]], align 4 +// IR-NEXT: [[TMP7:%.+]] = load i32*, i32** [[I_REF]], align 8 +// IR-NEXT: [[TMP8:%.+]] = load i32, i32* [[TMP7]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP8]]) +// IR-NEXT: br label %[[FOR_INC:.+]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP9:%.+]] = load i32, i32* [[TILE]], align 4 +// IR-NEXT: [[INC:%.+]] = add nsw i32 [[TMP9]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[TILE]], align 4 +// IR-NEXT: br label %[[FOR_COND3]] +// IR: [[FOR_END]]: +// IR-NEXT: br label %[[FOR_INC9:.+]] +// IR: [[FOR_INC9]]: +// IR-NEXT: [[TMP10:%.+]] = load i32, i32* [[FLOOR]], align 4 +// IR-NEXT: [[ADD10:%.+]] = add nsw i32 [[TMP10]], 5 +// IR-NEXT: store i32 [[ADD10]], i32* [[FLOOR]], align 4 +// IR-NEXT: br label %[[FOR_COND]] +// IR: [[FOR_END11]]: +// IR-NEXT: ret void + +struct S { + int i; + S() { +#pragma omp tile sizes(5) + for (i = 7; i < 17; i += 3) + body(i); + } +} s; + +// IR-LABEL: define {{.*}}void @foo1( +// IR: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[CAP_EXPR:%.+]] = alloca i32, align 4 +// IR-NEXT: [[CAP_EXPR1:%.+]] = alloca i32, align 4 +// IR-NEXT: [[CAP_EXPR2:%.+]] = alloca i32, align 4 +// IR-NEXT: [[CAP_EXPR3:%.+]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[TMP0:%.+]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP0]], i32* [[CAP_EXPR]], align 4 +// IR-NEXT: [[TMP1:%.+]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP1]], i32* [[CAP_EXPR1]], align 4 +// IR-NEXT: [[TMP2:%.+]] = load i32, i32* [[STEP_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP2]], i32* [[CAP_EXPR2]], align 4 +// IR-NEXT: [[TMP3:%.+]] = load i32, i32* [[CAP_EXPR1]], align 4 +// IR-NEXT: [[TMP4:%.+]] = load i32, i32* [[CAP_EXPR]], align 4 +// IR-NEXT: [[SUB:%.+]] = sub i32 [[TMP3]], [[TMP4]] +// IR-NEXT: [[SUB4:%.+]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[TMP5:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4 +// IR-NEXT: [[ADD:%.+]] = add i32 [[SUB4]], [[TMP5]] +// IR-NEXT: [[TMP6:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4 +// IR-NEXT: [[DIV:%.+]] = udiv i32 [[ADD]], [[TMP6]] +// IR-NEXT: [[SUB5:%.+]] = sub i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB5]], i32* [[CAP_EXPR3]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND:.*]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP8:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4 +// IR-NEXT: [[ADD3:%.*]] = add i32 [[TMP8]], 1 +// IR-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]] +// IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END25:.*]] +// IR: [[FOR_BODY]]: +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP5]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4:.*]] +// IR: [[FOR_COND4]]: +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP11:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4 +// IR-NEXT: [[ADD10:%.*]] = add i32 [[TMP11]], 1 +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP11]], 5 +// IR-NEXT: [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]] +// IR-NEXT: br i1 [[CMP12]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// IR: [[COND_TRUE]]: +// IR-NEXT: [[TMP13:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4 +// IR-NEXT: [[ADD18:%.*]] = add i32 [[TMP13]], 1 +// IR-NEXT: br label %[[COND_END:.*]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[ADD18]], %[[COND_TRUE]] ], [ [[ADD19]], %[[COND_FALSE]] ] +// IR-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TMP6]], [[COND]] +// IR-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END:.*]] +// IR: [[FOR_BODY21]]: +// IR-NEXT: [[TMP15:%.+]] = load i32, i32* [[CAP_EXPR]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP17:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], [[TMP17]] +// IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP15]], [[MUL]] +// IR-NEXT: store i32 [[ADD22]], i32* [[I]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP21]]) +// IR-NEXT: br label %[[FOR_INC:.*]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4]] +// IR: [[FOR_END]]: +// IR-NEXT: br label %[[FOR_INC23:.*]] +// IR: [[FOR_INC23]]: +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 5 +// IR-NEXT: store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND]] +// IR: [[FOR_END25]]: +// IR-NEXT: ret void +// +extern "C" void foo1(int start, int end, int step) { + int i; +#pragma omp tile sizes(5) + for (i = start; i < end; i += step) + body(i); +} + +// IR-LABEL: define {{.*}}void @foo2( +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4 +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND:.*]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END30:.*]] +// IR: [[FOR_BODY]]: +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND1:.*]] +// IR: [[FOR_COND1]]: +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 4 +// IR-NEXT: br i1 [[CMP2]], label %[[FOR_BODY3:.*]], label %[[FOR_END27:.*]] +// IR: [[FOR_BODY3]]: +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP2]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4:.*]] +// IR: [[FOR_COND4]]: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 5 +// IR-NEXT: [[CMP5:%.*]] = icmp slt i32 4, [[ADD]] +// IR-NEXT: br i1 [[CMP5]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// IR: [[COND_TRUE]]: +// IR-NEXT: br label %[[COND_END:.*]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 5 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD6]], %[[COND_FALSE]] ] +// IR-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP3]], [[COND]] +// IR-NEXT: br i1 [[CMP7]], label %[[FOR_BODY8:.*]], label %[[FOR_END24:.*]] +// IR: [[FOR_BODY8]]: +// IR-NEXT: [[TMP6:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL:%.+]] = mul nsw i32 [[TMP6]], 3 +// IR-NEXT: [[ADD9:%.+]] = add nsw i32 7, [[MUL]] +// IR-NEXT: store i32 [[ADD9]], i32* [[I]], align 4 +// IR-NEXT: [[TMP7:%.+]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: store i32 [[TMP7]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND10:.+]] +// IR: [[FOR_COND10]]: +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP8]], 5 +// IR-NEXT: [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]] +// IR-NEXT: br i1 [[CMP11]], label %[[COND_TRUE12:.*]], label %[[COND_FALSE13:.*]] +// IR: [[COND_TRUE12]]: +// IR-NEXT: br label %[[COND_END15:.*]] +// IR: [[COND_FALSE13]]: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 5 +// IR-NEXT: br label %[[COND_END15]] +// IR: [[COND_END15]]: +// IR-NEXT: [[COND16:%.*]] = phi i32 [ 4, %[[COND_TRUE12]] ], [ [[ADD14]], %[[COND_FALSE13]] ] +// IR-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP7]], [[COND16]] +// IR-NEXT: br i1 [[CMP17]], label %[[FOR_BODY18:.*]], label %[[FOR_END:.*]] +// IR: [[FOR_BODY18]]: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[MUL20:%.*]] = mul nsw i32 [[TMP11]], 3 +// IR-NEXT: [[ADD21:%.*]] = add nsw i32 7, [[MUL20]] +// IR-NEXT: store i32 [[ADD21]], i32* [[J]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[J]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP12]], i32 [[TMP13]]) +// IR-NEXT: br label %[[FOR_INC:.*]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND10]] +// IR: [[FOR_END]]: +// IR-NEXT: br label %[[FOR_INC22:.*]] +// IR: [[FOR_INC22]]: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC23:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[INC23]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4]] +// IR: [[FOR_END24]]: +// IR-NEXT: br label %[[FOR_INC25:.*]] +// IR: [[FOR_INC25]]: +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: store i32 [[ADD26]], i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND1]] +// IR: [[FOR_END27]]: +// IR-NEXT: br label %[[FOR_INC28:.*]] +// IR: [[FOR_INC28]]: +// IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP17]], 5 +// IR-NEXT: store i32 [[ADD29]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND]] +// IR: [[FOR_END30]]: +// IR-NEXT: ret void +// +extern "C" void foo2(int start, int end, int step) { +#pragma omp tile sizes(5,5) + for (int i = 7; i < 17; i+=3) + for (int j = 7; j < 17; j+=3) + body(i,j); +} + +// IR-LABEL: @foo3( +// IR-NEXT: entry: +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2:@.*]]) +// IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1:@.*]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 0 +// IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// IR: [[COND_TRUE]]: +// IR-NEXT: br label %[[COND_END:.*]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP2]], %[[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// IR: [[OMP_INNER_FOR_COND]]: +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// IR-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] +// IR: [[OMP_INNER_FOR_BODY]]: +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 5 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND:.*]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP7]], 4 +// IR-NEXT: br i1 [[CMP3]], label %[[FOR_BODY:.*]], label %[[FOR_END33:.*]] +// IR: [[FOR_BODY]]: +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4:.*]] +// IR: [[FOR_COND4]]: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 5 +// IR-NEXT: [[CMP6:%.*]] = icmp slt i32 4, [[ADD5]] +// IR-NEXT: br i1 [[CMP6]], label %[[COND_TRUE7:.*]], label %[[COND_FALSE8:.*]] +// IR: [[COND_TRUE7]]: +// IR-NEXT: br label %[[COND_END10:.*]] +// IR: [[COND_FALSE8]]: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], 5 +// IR-NEXT: br label %[[COND_END10]] +// IR: [[COND_END10]]: +// IR-NEXT: [[COND11:%.*]] = phi i32 [ 4, %[[COND_TRUE7]] ], [ [[ADD9]], %[[COND_FALSE8]] ] +// IR-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP9]], [[COND11]] +// IR-NEXT: br i1 [[CMP12]], label %[[FOR_BODY13:.*]], label %[[FOR_END30:.*]] +// IR: [[FOR_BODY13]]: +// IR-NEXT: [[TMP12:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL13:%.+]] = mul nsw i32 [[TMP12]], 3 +// IR-NEXT: [[ADD14:%.+]] = add nsw i32 7, [[MUL13]] +// IR-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: store i32 [[TMP12]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND14:.*]] +// IR: [[FOR_COND14]]: +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP14]], 5 +// IR-NEXT: [[CMP16:%.*]] = icmp slt i32 4, [[ADD15]] +// IR-NEXT: br i1 [[CMP16]], label %[[COND_TRUE17:.*]], label %[[COND_FALSE18:.*]] +// IR: [[COND_TRUE17]]: +// IR-NEXT: br label %[[COND_END20:.*]] +// IR: [[COND_FALSE18]]: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP15]], 5 +// IR-NEXT: br label %[[COND_END20]] +// IR: [[COND_END20]]: +// IR-NEXT: [[COND21:%.*]] = phi i32 [ 4, %[[COND_TRUE17]] ], [ [[ADD19]], %[[COND_FALSE18]] ] +// IR-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP13]], [[COND21]] +// IR-NEXT: br i1 [[CMP22]], label %[[FOR_BODY23:.*]], label %[[FOR_END:.*]] +// IR: [[FOR_BODY23]]: +// IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[MUL26:%.*]] = mul nsw i32 [[TMP17]], 3 +// IR-NEXT: [[ADD27:%.*]] = add nsw i32 7, [[MUL26]] +// IR-NEXT: store i32 [[ADD27]], i32* [[J]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[J]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP18]], i32 [[TMP19]]) +// IR-NEXT: br label %[[FOR_INC:.*]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND14]] +// IR: [[FOR_END]]: +// IR-NEXT: br label %[[FOR_INC28:.*]] +// IR: [[FOR_INC28]]: +// IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC29:%.*]] = add nsw i32 [[TMP21]], 1 +// IR-NEXT: store i32 [[INC29]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4]] +// IR: [[FOR_END30]]: +// IR-NEXT: br label %[[FOR_INC31:.*]] +// IR: [[FOR_INC31]]: +// IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP22]], 5 +// IR-NEXT: store i32 [[ADD32]], i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND]] +// IR: [[FOR_END33]]: +// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// IR: [[OMP_BODY_CONTINUE]]: +// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// IR: [[OMP_INNER_FOR_INC]]: +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-NEXT: store i32 [[ADD34]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] +// IR: [[OMP_INNER_FOR_END]]: +// IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] +// IR: [[OMP_LOOP_EXIT]]: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]]) +// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB3:@.*]], i32 [[TMP0]]) +// IR-NEXT: ret void +// +extern "C" void foo3() { +#pragma omp for +#pragma omp tile sizes(5,5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + +// IR-LABEL: @foo4( +// IR-NEXT: entry: +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[K:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2]]) +// IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// IR: [[COND_TRUE]]: +// IR-NEXT: br label %[[COND_END:.*]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 3, %[[COND_TRUE]] ], [ [[TMP2]], %[[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// IR: [[OMP_INNER_FOR_COND]]: +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// IR-NEXT: br i1 [[CMP3]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] +// IR: [[OMP_INNER_FOR_BODY]]: +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 1 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// IR-NEXT: store i32 [[ADD]], i32* [[K]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP8]], 1 +// IR-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 1 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]] +// IR-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 5 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// IR-NEXT: store i32 [[ADD7]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND:.*]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP9]], 4 +// IR-NEXT: br i1 [[CMP8]], label %[[FOR_BODY:.*]], label %[[FOR_END38:.*]] +// IR: [[FOR_BODY]]: +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP10]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND9:.*]] +// IR: [[FOR_COND9]]: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 5 +// IR-NEXT: [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]] +// IR-NEXT: br i1 [[CMP11]], label %[[COND_TRUE12:.*]], label %[[COND_FALSE13:.*]] +// IR: [[COND_TRUE12]]: +// IR-NEXT: br label %[[COND_END15:.*]] +// IR: [[COND_FALSE13]]: +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP13]], 5 +// IR-NEXT: br label %[[COND_END15]] +// IR: [[COND_END15]]: +// IR-NEXT: [[COND16:%.*]] = phi i32 [ 4, %[[COND_TRUE12]] ], [ [[ADD14]], %[[COND_FALSE13]] ] +// IR-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP11]], [[COND16]] +// IR-NEXT: br i1 [[CMP17]], label %[[FOR_BODY18:.*]], label %[[FOR_END35:.*]] +// IR: [[FOR_BODY18]]: +// IR-NEXT: [[TMP14:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL18:%.+]] = mul nsw i32 [[TMP14]], 3 +// IR-NEXT: [[ADD19:%.+]] = add nsw i32 7, [[MUL18]] +// IR-NEXT: store i32 [[ADD19]], i32* [[I]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: store i32 [[TMP14]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND19:.*]] +// IR: [[FOR_COND19]]: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: [[CMP21:%.*]] = icmp slt i32 4, [[ADD20]] +// IR-NEXT: br i1 [[CMP21]], label %[[COND_TRUE22:.*]], label %[[COND_FALSE23:.*]] +// IR: [[COND_TRUE22]]: +// IR-NEXT: br label %[[COND_END25:.*]] +// IR: [[COND_FALSE23]]: +// IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP17]], 5 +// IR-NEXT: br label %[[COND_END25]] +// IR: [[COND_END25]]: +// IR-NEXT: [[COND26:%.*]] = phi i32 [ 4, %[[COND_TRUE22]] ], [ [[ADD24]], %[[COND_FALSE23]] ] +// IR-NEXT: [[CMP27:%.*]] = icmp slt i32 [[TMP15]], [[COND26]] +// IR-NEXT: br i1 [[CMP27]], label %[[FOR_BODY28:.*]], label %[[FOR_END:.*]] +// IR: [[FOR_BODY28]]: +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[MUL31:%.*]] = mul nsw i32 [[TMP19]], 3 +// IR-NEXT: [[ADD32:%.*]] = add nsw i32 7, [[MUL31]] +// IR-NEXT: store i32 [[ADD32]], i32* [[J]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[J]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP20]], i32 [[TMP21]]) +// IR-NEXT: br label %[[FOR_INC:.*]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND19]] +// IR: [[FOR_END]]: +// IR-NEXT: br label %[[FOR_INC33:.*]] +// IR: [[FOR_INC33]]: +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC34:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-NEXT: store i32 [[INC34]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND9]] +// IR: [[FOR_END35]]: +// IR-NEXT: br label %[[FOR_INC36:.*]] +// IR: [[FOR_INC36]]: +// IR-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP24]], 5 +// IR-NEXT: store i32 [[ADD37]], i32* [[DOTFLOOR_1_IV_J]], align 4 +// IR-NEXT: br label %[[FOR_COND]] +// IR: [[FOR_END38]]: +// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// IR: [[OMP_BODY_CONTINUE]]: +// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// IR: [[OMP_INNER_FOR_INC]]: +// IR-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD39:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] +// IR: [[OMP_INNER_FOR_END]]: +// IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] +// IR: [[OMP_LOOP_EXIT]]: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]]) +// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB3]], i32 [[TMP0]]) +// IR-NEXT: ret void +// +extern "C" void foo4() { +#pragma omp for collapse(2) + for (int k = 7; k < 17; k += 3) +#pragma omp tile sizes(5,5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +// IR-LABEL: @foo5( +// IR-NEXT: entry: +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I10:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J15:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2]]) +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +// IR-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 4, [[ADD]] +// IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// IR: [[COND_TRUE]]: +// IR-NEXT: br label %[[COND_END:.*]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP]], align 4 +// IR-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 5 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD4]], %[[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// IR-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[ADD7:%.*]] = add i32 [[SUB6]], 1 +// IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD7]], 1 +// IR-NEXT: [[CONV:%.*]] = zext i32 [[DIV]] to i64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i64 1, [[CONV]] +// IR-NEXT: [[MUL8:%.*]] = mul nsw i64 [[MUL]], 4 +// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL8]], 1 +// IR-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: store i32 [[TMP6]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: store i32 7, i32* [[J]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[CMP12]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]] +// IR: [[OMP_PRECOND_THEN]]: +// IR-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* [[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: [[CMP16:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP16]], label %[[COND_TRUE17:.*]], label %[[COND_FALSE18:.*]] +// IR: [[COND_TRUE17]]: +// IR-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: br label %[[COND_END19:.*]] +// IR: [[COND_FALSE18]]: +// IR-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: br label %[[COND_END19]] +// IR: [[COND_END19]]: +// IR-NEXT: [[COND20:%.*]] = phi i64 [ [[TMP12]], %[[COND_TRUE17]] ], [ [[TMP13]], %[[COND_FALSE18]] ] +// IR-NEXT: store i64 [[COND20]], i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// IR-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// IR: [[OMP_INNER_FOR_COND]]: +// IR-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// IR-NEXT: [[CMP21:%.*]] = icmp sle i64 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP21]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] +// IR: [[OMP_INNER_FOR_BODY]]: +// IR-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB22:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// IR-NEXT: [[SUB23:%.*]] = sub i32 [[SUB22]], 1 +// IR-NEXT: [[ADD24:%.*]] = add i32 [[SUB23]], 1 +// IR-NEXT: [[DIV25:%.*]] = udiv i32 [[ADD24]], 1 +// IR-NEXT: [[MUL26:%.*]] = mul i32 1, [[DIV25]] +// IR-NEXT: [[MUL27:%.*]] = mul i32 [[MUL26]], 4 +// IR-NEXT: [[CONV28:%.*]] = zext i32 [[MUL27]] to i64 +// IR-NEXT: [[DIV29:%.*]] = sdiv i64 [[TMP17]], [[CONV28]] +// IR-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV29]], 5 +// IR-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] +// IR-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 +// IR-NEXT: store i32 [[CONV32]], i32* [[DOTFLOOR_0_IV_I10]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CONV33:%.*]] = sext i32 [[TMP20]] to i64 +// IR-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB34:%.*]] = sub i32 [[TMP23]], [[TMP24]] +// IR-NEXT: [[SUB35:%.*]] = sub i32 [[SUB34]], 1 +// IR-NEXT: [[ADD36:%.*]] = add i32 [[SUB35]], 1 +// IR-NEXT: [[DIV37:%.*]] = udiv i32 [[ADD36]], 1 +// IR-NEXT: [[MUL38:%.*]] = mul i32 1, [[DIV37]] +// IR-NEXT: [[MUL39:%.*]] = mul i32 [[MUL38]], 4 +// IR-NEXT: [[CONV40:%.*]] = zext i32 [[MUL39]] to i64 +// IR-NEXT: [[DIV41:%.*]] = sdiv i64 [[TMP22]], [[CONV40]] +// IR-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB42:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// IR-NEXT: [[SUB43:%.*]] = sub i32 [[SUB42]], 1 +// IR-NEXT: [[ADD44:%.*]] = add i32 [[SUB43]], 1 +// IR-NEXT: [[DIV45:%.*]] = udiv i32 [[ADD44]], 1 +// IR-NEXT: [[MUL46:%.*]] = mul i32 1, [[DIV45]] +// IR-NEXT: [[MUL47:%.*]] = mul i32 [[MUL46]], 4 +// IR-NEXT: [[CONV48:%.*]] = zext i32 [[MUL47]] to i64 +// IR-NEXT: [[MUL49:%.*]] = mul nsw i64 [[DIV41]], [[CONV48]] +// IR-NEXT: [[SUB50:%.*]] = sub nsw i64 [[TMP21]], [[MUL49]] +// IR-NEXT: [[DIV51:%.*]] = sdiv i64 [[SUB50]], 4 +// IR-NEXT: [[MUL52:%.*]] = mul nsw i64 [[DIV51]], 1 +// IR-NEXT: [[ADD53:%.*]] = add nsw i64 [[CONV33]], [[MUL52]] +// IR-NEXT: [[CONV54:%.*]] = trunc i64 [[ADD53]] to i32 +// IR-NEXT: store i32 [[CONV54]], i32* [[DOTTILE_0_IV_I11]], align 4 +// IR-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB55:%.*]] = sub i32 [[TMP29]], [[TMP30]] +// IR-NEXT: [[SUB56:%.*]] = sub i32 [[SUB55]], 1 +// IR-NEXT: [[ADD57:%.*]] = add i32 [[SUB56]], 1 +// IR-NEXT: [[DIV58:%.*]] = udiv i32 [[ADD57]], 1 +// IR-NEXT: [[MUL59:%.*]] = mul i32 1, [[DIV58]] +// IR-NEXT: [[MUL60:%.*]] = mul i32 [[MUL59]], 4 +// IR-NEXT: [[CONV61:%.*]] = zext i32 [[MUL60]] to i64 +// IR-NEXT: [[DIV62:%.*]] = sdiv i64 [[TMP28]], [[CONV61]] +// IR-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB63:%.*]] = sub i32 [[TMP31]], [[TMP32]] +// IR-NEXT: [[SUB64:%.*]] = sub i32 [[SUB63]], 1 +// IR-NEXT: [[ADD65:%.*]] = add i32 [[SUB64]], 1 +// IR-NEXT: [[DIV66:%.*]] = udiv i32 [[ADD65]], 1 +// IR-NEXT: [[MUL67:%.*]] = mul i32 1, [[DIV66]] +// IR-NEXT: [[MUL68:%.*]] = mul i32 [[MUL67]], 4 +// IR-NEXT: [[CONV69:%.*]] = zext i32 [[MUL68]] to i64 +// IR-NEXT: [[MUL70:%.*]] = mul nsw i64 [[DIV62]], [[CONV69]] +// IR-NEXT: [[SUB71:%.*]] = sub nsw i64 [[TMP27]], [[MUL70]] +// IR-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB72:%.*]] = sub i32 [[TMP35]], [[TMP36]] +// IR-NEXT: [[SUB73:%.*]] = sub i32 [[SUB72]], 1 +// IR-NEXT: [[ADD74:%.*]] = add i32 [[SUB73]], 1 +// IR-NEXT: [[DIV75:%.*]] = udiv i32 [[ADD74]], 1 +// IR-NEXT: [[MUL76:%.*]] = mul i32 1, [[DIV75]] +// IR-NEXT: [[MUL77:%.*]] = mul i32 [[MUL76]], 4 +// IR-NEXT: [[CONV78:%.*]] = zext i32 [[MUL77]] to i64 +// IR-NEXT: [[DIV79:%.*]] = sdiv i64 [[TMP34]], [[CONV78]] +// IR-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB80:%.*]] = sub i32 [[TMP37]], [[TMP38]] +// IR-NEXT: [[SUB81:%.*]] = sub i32 [[SUB80]], 1 +// IR-NEXT: [[ADD82:%.*]] = add i32 [[SUB81]], 1 +// IR-NEXT: [[DIV83:%.*]] = udiv i32 [[ADD82]], 1 +// IR-NEXT: [[MUL84:%.*]] = mul i32 1, [[DIV83]] +// IR-NEXT: [[MUL85:%.*]] = mul i32 [[MUL84]], 4 +// IR-NEXT: [[CONV86:%.*]] = zext i32 [[MUL85]] to i64 +// IR-NEXT: [[MUL87:%.*]] = mul nsw i64 [[DIV79]], [[CONV86]] +// IR-NEXT: [[SUB88:%.*]] = sub nsw i64 [[TMP33]], [[MUL87]] +// IR-NEXT: [[DIV89:%.*]] = sdiv i64 [[SUB88]], 4 +// IR-NEXT: [[MUL90:%.*]] = mul nsw i64 [[DIV89]], 4 +// IR-NEXT: [[SUB91:%.*]] = sub nsw i64 [[SUB71]], [[MUL90]] +// IR-NEXT: [[MUL92:%.*]] = mul nsw i64 [[SUB91]], 3 +// IR-NEXT: [[ADD93:%.*]] = add nsw i64 7, [[MUL92]] +// IR-NEXT: [[CONV94:%.*]] = trunc i64 [[ADD93]] to i32 +// IR-NEXT: store i32 [[CONV94]], i32* [[J15]], align 4 +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTTILE_0_IV_I11]], align 4 +// IR-NEXT: [[MUL95:%.*]] = mul nsw i32 [[TMP39]], 3 +// IR-NEXT: [[ADD96:%.*]] = add nsw i32 7, [[MUL95]] +// IR-NEXT: store i32 [[ADD96]], i32* [[I]], align 4 +// IR-NEXT: [[TMP40:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: [[TMP41:%.*]] = load i32, i32* [[J15]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP40]], i32 [[TMP41]]) +// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// IR: [[OMP_BODY_CONTINUE]]: +// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// IR: [[OMP_INNER_FOR_INC]]: +// IR-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: [[ADD97:%.*]] = add nsw i64 [[TMP42]], 1 +// IR-NEXT: store i64 [[ADD97]], i64* [[DOTOMP_IV]], align 8 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] +// IR: [[OMP_INNER_FOR_END]]: +// IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] +// IR: [[OMP_LOOP_EXIT]]: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]]) +// IR-NEXT: br label %[[OMP_PRECOND_END]] +// IR: [[OMP_PRECOND_END]]: +// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB3]], i32 [[TMP0]]) +// IR-NEXT: ret void +// +extern "C" void foo5() { +#pragma omp for collapse(3) +#pragma omp tile sizes(5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +// IR-LABEL: @foo6( +// IR-NEXT: entry: +// IR-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// IR-NEXT: ret void +// +// IR-LABEL: @.omp_outlined.( +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// IR: [[COND_TRUE]]: +// IR-NEXT: br label %[[COND_END:.*]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// IR: [[OMP_INNER_FOR_COND]]: +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// IR-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] +// IR: [[OMP_INNER_FOR_BODY]]: +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 7, i32* [[I]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND:.*]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 5 +// IR-NEXT: [[CMP4:%.*]] = icmp slt i32 4, [[ADD3]] +// IR-NEXT: br i1 [[CMP4]], label %[[COND_TRUE5:.*]], label %[[COND_FALSE6:.*]] +// IR: [[COND_TRUE5]]: +// IR-NEXT: br label %[[COND_END8:.*]] +// IR: [[COND_FALSE6]]: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 5 +// IR-NEXT: br label %[[COND_END8]] +// IR: [[COND_END8]]: +// IR-NEXT: [[COND9:%.*]] = phi i32 [ 4, %[[COND_TRUE5]] ], [ [[ADD7]], %[[COND_FALSE6]] ] +// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP9]], [[COND9]] +// IR-NEXT: br i1 [[CMP10]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// IR: [[FOR_BODY]]: +// IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 3 +// IR-NEXT: [[ADD12:%.*]] = add nsw i32 7, [[MUL11]] +// IR-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP13]]) +// IR-NEXT: br label %[[FOR_INC:.*]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND]] +// IR: [[FOR_END]]: +// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// IR: [[OMP_BODY_CONTINUE]]: +// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// IR: [[OMP_INNER_FOR_INC]]: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] +// IR: [[OMP_INNER_FOR_END]]: +// IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] +// IR: [[OMP_LOOP_EXIT]]: +// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP1]]) +// IR-NEXT: ret void +// +extern "C" void foo6() { +#pragma omp parallel for +#pragma omp tile sizes(5) + for (int i = 7; i < 17; i += 3) + body(i); +} + + +template +void foo7(T start, T end) { +#pragma omp tile sizes(Tile) + for (T i = start; i < end; i += Step) + body(i); +} + +// IR-LABEL: define {{.*}}void @tfoo7( +// IR-NEXT: entry: +// IR-NEXT: call void @_Z4foo7IiLi3ELi5EEvT_S0_(i32 0, i32 42) +// IR-NEXT: ret void +// +// IR-LABEL: define linkonce_odr void @_Z4foo7IiLi3ELi5EEvT_S0_( +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[CAPTURE_EXPR:%.+]] = alloca i32, align 4 +// IR-NEXT: [[CAPTURE_EXPR1:%.+]] = alloca i32, align 4 +// IR-NEXT: [[CAPTURE_EXPR2:%.+]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 +// IR-NEXT: [[TMP0:%.+]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP0]], i32* [[CAPTURE_EXPR]], align 4 +// IR-NEXT: [[TMP1:%.+]] = load i32, i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP1]], i32* [[CAPTURE_EXPR1]], align 4 +// IR-NEXT: [[TMP2:%.+]] = load i32, i32* [[CAPTURE_EXPR1]], align 4 +// IR-NEXT: [[TMP3:%.+]] = load i32, i32* [[CAPTURE_EXPR]], align 4 +// IR-NEXT: [[SUB:%.+]] = sub i32 [[TMP2]], [[TMP3]] +// IR-NEXT: [[SUB3:%.+]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[ADD:%.+]] = add i32 [[SUB3]], 3 +// IR-NEXT: [[DIV:%.+]] = udiv i32 [[ADD]], 3 +// IR-NEXT: [[SUB4:%.+]] = sub i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], i32* [[CAPTURE_EXPR2]], align 4 +// IR-NEXT: [[TMP4:%.+]] = load i32, i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], i32* [[I]], align 4 +// IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND:.*]] +// IR: [[FOR_COND]]: +// IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP6:%.+]] = load i32, i32* [[CAPTURE_EXPR2]], align 4 +// IR-NEXT: [[ADD3:%.*]] = add i32 [[TMP6]], 1 +// IR-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]] +// IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END25:.*]] +// IR: [[FOR_BODY]]: +// IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP3]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4:.*]] +// IR: [[FOR_COND4]]: +// IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[CAPTURE_EXPR2]], align 4 +// IR-NEXT: [[ADD10:%.*]] = add i32 [[TMP5]], 1 +// IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP7]], 5 +// IR-NEXT: [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]] +// IR-NEXT: br i1 [[CMP12]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// IR: [[COND_TRUE]]: +// IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[CAPTURE_EXPR2]], align 4 +// IR-NEXT: [[ADD18:%.*]] = add i32 [[TMP8]], 1 +// IR-NEXT: br label %[[COND_END:.*]] +// IR: [[COND_FALSE]]: +// IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP10]], 5 +// IR-NEXT: br label %[[COND_END]] +// IR: [[COND_END]]: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[ADD18]], %[[COND_TRUE]] ], [ [[ADD19]], %[[COND_FALSE]] ] +// IR-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TMP4]], [[COND]] +// IR-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END:.*]] +// IR: [[FOR_BODY21]]: +// IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[CAPTURE_EXPR]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 3 +// IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP11]], [[MUL]] +// IR-NEXT: store i32 [[ADD22]], i32* [[I]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 [[TMP14]]) +// IR-NEXT: br label %[[FOR_INC:.*]] +// IR: [[FOR_INC]]: +// IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND4]] +// IR: [[FOR_END]]: +// IR-NEXT: br label %[[FOR_INC23:.*]] +// IR: [[FOR_INC23]]: +// IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP16]], 5 +// IR-NEXT: store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND]] +// IR: [[FOR_END25]]: +// IR-NEXT: ret void +// +extern "C" void tfoo7() { + foo7(0, 42); +} + +#endif /* HEADER */ diff --git a/clang/test/OpenMP/tile_messages.cpp b/clang/test/OpenMP/tile_messages.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/tile_messages.cpp @@ -0,0 +1,127 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=51 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+1 {{expected '('}} + #pragma omp tile sizes + ; + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes( + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes() + ; + + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes(5 + for (int i = 0; i < 7; ++i); + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes(5, + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes(5,) + ; + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp tile sizes(5+ + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes(5+) + ; + + // expected-error@+1 {{expected expression}} + #pragma omp tile sizes(for) + ; + + // expected-error@+1 {{argument to 'sizes' clause must be a strictly positive integer value}} + #pragma omp tile sizes(0) + ; + + // expected-error@+4 {{expression is not an integral constant expression}} + // expected-note@+3 {{read of non-const variable 'a' is not allowed in a constant expression}} + // expected-note@+1 {{declared here}} + int a; + #pragma omp tile sizes(a) + ; + + // expected-warning@+2 {{extra tokens at the end of '#pragma omp tile' are ignored}} + // expected-error@+1 {{directive '#pragma omp tile' requires the 'sizes' clause}} + #pragma omp tile foo + ; + + // expected-error@+1 {{directive '#pragma omp tile' cannot contain more than one 'sizes' clause}} + #pragma omp tile sizes(5) sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+1 {{unexpected OpenMP clause 'collapse' in directive '#pragma omp tile'}} + #pragma omp tile sizes(5) collapse(2) + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp tile sizes(5) + } + + // expected-error@+2 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5) + int b = 0; + + // expected-error@+3 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) { + int k = 3; + for (int j = 0; j < 7; ++j) + ; + } + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = i; j < 7; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp tile sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+5 {{expected 3 for loops after '#pragma omp for', but found only 2}} + // expected-note@+1 {{as specified in 'collapse' clause}} + #pragma omp for collapse(3) + #pragma omp tile sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{statement after '#pragma omp tile' must be a for loop}} + #pragma omp tile sizes(5) + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} + #pragma omp tile sizes(5) + for (int i = 0; i/3<7; ++i) + ; +} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2038,9 +2038,11 @@ void VisitOpaqueValueExpr(const OpaqueValueExpr *E); void VisitLambdaExpr(const LambdaExpr *E); void VisitOMPExecutableDirective(const OMPExecutableDirective *D); + void VisitOMPLoopBasedDirective(const OMPLoopBasedDirective *D); void VisitOMPLoopDirective(const OMPLoopDirective *D); void VisitOMPParallelDirective(const OMPParallelDirective *D); void VisitOMPSimdDirective(const OMPSimdDirective *D); + void VisitOMPTileDirective(const OMPTileDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -2214,6 +2216,11 @@ Visitor->AddStmt(C->getSimdlen()); } +void OMPClauseEnqueue::VisitOMPSizesClause(const OMPSizesClause *C) { + for (auto E : C->getSizesRefs()) + Visitor->AddStmt(E); +} + void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { Visitor->AddStmt(C->getAllocator()); } @@ -2842,10 +2849,15 @@ EnqueueChildren(*I); } -void EnqueueVisitor::VisitOMPLoopDirective(const OMPLoopDirective *D) { +void EnqueueVisitor::VisitOMPLoopBasedDirective( + const OMPLoopBasedDirective *D) { VisitOMPExecutableDirective(D); } +void EnqueueVisitor::VisitOMPLoopDirective(const OMPLoopDirective *D) { + VisitOMPLoopBasedDirective(D); +} + void EnqueueVisitor::VisitOMPParallelDirective(const OMPParallelDirective *D) { VisitOMPExecutableDirective(D); } @@ -2854,6 +2866,10 @@ VisitOMPLoopDirective(D); } +void EnqueueVisitor::VisitOMPTileDirective(const OMPTileDirective *D) { + VisitOMPLoopBasedDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -5528,6 +5544,8 @@ return cxstring::createRef("OMPParallelDirective"); case CXCursor_OMPSimdDirective: return cxstring::createRef("OMPSimdDirective"); + case CXCursor_OMPTileDirective: + return cxstring::createRef("OMPTileDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -645,6 +645,9 @@ case Stmt::OMPSimdDirectiveClass: K = CXCursor_OMPSimdDirective; break; + case Stmt::OMPTileDirectiveClass: + K = CXCursor_OMPTileDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -68,6 +68,7 @@ let clangClass = "OMPPrivateClause"; let flangClass = "OmpObjectList"; } +def OMPC_Sizes: Clause<"sizes"> { let clangClass = "OMPSizesClause"; } def OMPC_FirstPrivate : Clause<"firstprivate"> { let clangClass = "OMPFirstprivateClause"; let flangClass = "OmpObjectList"; @@ -375,6 +376,11 @@ VersionedClause, ]; } +def OMP_Tile : Directive<"tile"> { + let allowedOnceClauses = [ + VersionedClause, + ]; +} def OMP_For : Directive<"for"> { let allowedClauses = [ VersionedClause,