diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -46,6 +46,7 @@ class ASTContext; class Attr; class CapturedDecl; +class CapturedStmt; class Decl; class Expr; class AddrLabelExpr; @@ -2495,23 +2496,222 @@ } }; +/// Implementation for holding the children of ForStmt or CXXForRangeStmt. +class LoopChildren final : private llvm::TrailingObjects { + friend class ForStmt; + friend class CXXForRangeStmt; + +private: + unsigned NumChildren; + + size_t numTrailingObjects(OverloadToken) const { return NumChildren; } + static size_t size(unsigned NumChildren) { + return totalSizeToAlloc(NumChildren); + } + + LoopChildren() = delete; + LoopChildren(unsigned NumChildren); + +public: + /// Return this AST node's children. + /// @{ + Stmt::child_range children(); + Stmt::const_child_range children() const; + + MutableArrayRef getChildren() { + return {&getTrailingObjects()[0], NumChildren}; + } + ArrayRef getChildren() const { + return {&getTrailingObjects()[0], NumChildren}; + } + /// @} +}; // class LoopChildren + +/// Superclass for AST nodes that can be OpenMP canonical loops. +/// +/// If the semantic analyzer determines that a ForStmt or CXXForRangeStmt is +/// used in context of an OpenMP loop-associated directive (OMPLoopDirective) +/// and fulfills the requirement of an OpenMP canonical loop, the AST node has +/// three more properties: the distance function, the loop variable function and +/// the loop variable reference. Otherweise, these members are NULL. +/// +/// An OpenMP canonical loop is a for-statement or range-based for-statement +/// with additional requirements that ensure that the number of iterations is +/// known before entering the loop and allow skipping to an arbitrary iteration. +/// The MaybeCanonicalLoopStmt AST node wraps a ForStmt or CXXRangeForStmt that +/// is known for fulfill OpenMP's loop requirements. +/// +/// There are three different kinds of iteration variables for different +/// purposes: +/// * Loop variable: The user-accessible variable with different value for each +/// iteration. +/// * Loop counter: The variable used to identify a loop iterations; for +/// range-based for-statement, this is the hidden iterator '__begin'. For +/// other loops, it is identical to the loop variable. Must be a random-access +/// iterator or integer type. +/// * Logical iteration counter: Normalized loop counter starting at 0 and +/// incrementing by one at each iterations. Allows abstracting over the type +/// of the loop counter and is always an unsigned integer type appropriate to +/// represent the range of the loop counter variable. +/// +/// This AST node provides two captured statements: +/// * The distance function which computes the number of iterations. +/// * The loop variable function that computes the loop variable when given a +/// logical iteration number. +/// +/// These captured statements provide the link between C/C++ semantics and the +/// logical iteration counters used by the OpenMPIRBuilder which is +/// language-agnostic and therefore does not know e.g. how to advance a +/// random-access iterator. The OpenMPIRBuilder will use this information to +/// convert the loop into simd-, workshare-, distribute-, taskloop etc. For +/// compatibility with the non-OpenMPIRBuilder codegen path, an +/// MaybeCanonicalLoopStmt can itself also be wrapped into the CapturedStmts of +/// an OMPLoopDirective and skipped when searching for the associated +/// syntactical loop. +/// +/// Example: +/// +/// std::vector Container{1,2,3}; +/// for (std::string Str : Container) +/// Body(Str); +/// +/// which is syntactic sugar for approximately: +/// +/// auto &&__range = Container; +/// auto __begin = std::begin(__range); +/// auto __end = std::end(__range); +/// for (; __begin != __end; ++__begin) { +/// std::String Str = *__begin; +/// Body(Str); +/// } +/// +/// In this example, the loop variable is `Str`, the loop counter is `__begin` +/// of type `std::vector::iterator` and the logical iteration +/// number type is `size_t` (unsigned version of +/// `std::vector::iterator::difference_type` aka `ptrdiff_t`). +/// Therefore, the distance function will be +/// +/// [&](size_t &Result) { Result = __end - __begin; } +/// +/// and the loop variable function is +/// +/// [&,__begin](std::vector::iterator &Result, size_t Logical) { +/// Result = __begin + Logical; } +/// +class MaybeCanonicalLoopStmt + : public Stmt, + private llvm::TrailingObjects { + +protected: + /// Children of this AST node. + enum { DISTANCE_FUNC, LOOPVAR_FUNC, LOOPVAR_REF, LastSubStmt = LOOPVAR_REF }; + static constexpr unsigned SubStmtCount = LastSubStmt + 1; + + LoopChildren *Data; + + MaybeCanonicalLoopStmt() = delete; + MaybeCanonicalLoopStmt(StmtClass SC, LoopChildren *Data) + : Stmt(SC), Data(Data) {} + + MutableArrayRef getChildren() { return Data->getChildren(); } + ArrayRef getChildren() const { return Data->getChildren(); } + + MutableArrayRef getCanonicalChildren() { + MutableArrayRef AllChildren = Data->getChildren(); + return Data->getChildren().slice(AllChildren.size() - SubStmtCount, + SubStmtCount); + } + ArrayRef getCanonicalChildren() const { + ArrayRef AllChildren = Data->getChildren(); + return Data->getChildren().slice(AllChildren.size() - SubStmtCount, + SubStmtCount); + } + +public: + static bool classof(const Stmt *S) { + return StmtClass::firstMaybeCanonicalLoopStmtConstant <= + S->getStmtClass() && + S->getStmtClass() <= StmtClass::lastMaybeCanonicalLoopStmtConstant; + } + +public: + /// Return this AST node's children. + /// @{ + child_range children() { return Data->children(); } + const_child_range children() const { return Data->children(); } + /// @} + + /// The function that computes the number of loop iterations. Can be evaluated + /// before entering the loop but after the syntactical loop's init + /// statement(s). + /// + /// Function signature: void(LogicalTy &Result) + /// Any values necessary to compute the distance are captures of the closure. + /// @{ + CapturedStmt *getDistanceFunc() { + return cast_or_null(getCanonicalChildren()[DISTANCE_FUNC]); + } + const CapturedStmt *getDistanceFunc() const { + return cast_or_null(getCanonicalChildren()[DISTANCE_FUNC]); + } + void setDistanceFunc(Stmt *S); + /// @} + + /// The function that compute the loop variable from a logical iteration + /// counter. Can be evaluated as first statement in the loop. + /// + /// Function signature: void(LoopVarTy &Result, LogicalTy Number) + /// Ayn other values required to compute the loop variable (such as start + /// value, step size) are captured by the closure. In particular, the initial + /// value of loop counter is captured by value to be unaffected by previous + /// iterations. + /// @{ + CapturedStmt *getLoopVarFunc() { + return cast_or_null(getCanonicalChildren()[LOOPVAR_FUNC]); + } + const CapturedStmt *getLoopVarFunc() const { + return cast_or_null(getCanonicalChildren()[LOOPVAR_FUNC]); + } + void setLoopVarFunc(Stmt *S); + /// @} + + /// Reference to the loop variable as accessed in the loop body. + /// @{ + DeclRefExpr *getLoopVarRef() { + return reinterpret_cast(getCanonicalChildren()[LOOPVAR_REF]); + } + const DeclRefExpr *getLoopVarRef() const { + return reinterpret_cast( + getCanonicalChildren()[LOOPVAR_REF]); + } + void setLoopVarRef(Expr *E); + /// @} +}; + /// ForStmt - This represents a 'for (init;cond;inc)' stmt. Note that any of /// the init/cond/inc parts of the ForStmt will be null if they were not /// specified in the source. -class ForStmt : public Stmt { - enum { INIT, CONDVAR, COND, INC, BODY, END_EXPR }; - Stmt* SubExprs[END_EXPR]; // SubExprs[INIT] is an expression or declstmt. +class ForStmt : public MaybeCanonicalLoopStmt { + enum { INIT, CONDVAR, COND, INC, BODY, LastSub = BODY }; + static const unsigned SubCount = LastSub + 1; SourceLocation LParenLoc, RParenLoc; -public: - ForStmt(const ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar, - Expr *Inc, Stmt *Body, SourceLocation FL, SourceLocation LP, - SourceLocation RP); - +private: /// Build an empty for statement. - explicit ForStmt(EmptyShell Empty) : Stmt(ForStmtClass, Empty) {} + explicit ForStmt(LoopChildren *Data) + : MaybeCanonicalLoopStmt(ForStmtClass, Data) {} + +public: + static ForStmt *create(ASTContext &Ctx, Stmt *Init, Expr *Cond, + VarDecl *condVar, Expr *Inc, Stmt *Body, + SourceLocation FL, SourceLocation LP, + SourceLocation RP, + CapturedStmt *DistanceFunc = nullptr, + CapturedStmt *LoopVarFunc = nullptr, + DeclRefExpr *LoopVarRef = nullptr); + static ForStmt *createEmpty(ASTContext &Ctx); - Stmt *getInit() { return SubExprs[INIT]; } + Stmt *getInit() { return getChildren()[INIT]; } /// Retrieve the variable declared in this "for" statement, if any. /// @@ -2527,22 +2727,26 @@ /// If this ForStmt has a condition variable, return the faux DeclStmt /// associated with the creation of that condition variable. const DeclStmt *getConditionVariableDeclStmt() const { - return reinterpret_cast(SubExprs[CONDVAR]); + return reinterpret_cast(getChildren()[CONDVAR]); } - Expr *getCond() { return reinterpret_cast(SubExprs[COND]); } - Expr *getInc() { return reinterpret_cast(SubExprs[INC]); } - Stmt *getBody() { return SubExprs[BODY]; } + Expr *getCond() { return reinterpret_cast(getChildren()[COND]); } + Expr *getInc() { return reinterpret_cast(getChildren()[INC]); } + Stmt *getBody() { return getChildren()[BODY]; } - const Stmt *getInit() const { return SubExprs[INIT]; } - const Expr *getCond() const { return reinterpret_cast(SubExprs[COND]);} - const Expr *getInc() const { return reinterpret_cast(SubExprs[INC]); } - const Stmt *getBody() const { return SubExprs[BODY]; } + const Stmt *getInit() const { return getChildren()[INIT]; } + const Expr *getCond() const { + return reinterpret_cast(getChildren()[COND]); + } + const Expr *getInc() const { + return reinterpret_cast(getChildren()[INC]); + } + const Stmt *getBody() const { return getChildren()[BODY]; } - void setInit(Stmt *S) { SubExprs[INIT] = S; } - void setCond(Expr *E) { SubExprs[COND] = reinterpret_cast(E); } - void setInc(Expr *E) { SubExprs[INC] = reinterpret_cast(E); } - void setBody(Stmt *S) { SubExprs[BODY] = S; } + void setInit(Stmt *S) { getChildren()[INIT] = S; } + void setCond(Expr *E) { getChildren()[COND] = reinterpret_cast(E); } + void setInc(Expr *E) { getChildren()[INC] = reinterpret_cast(E); } + void setBody(Stmt *S) { getChildren()[BODY] = S; } SourceLocation getForLoc() const { return ForStmtBits.ForLoc; } void setForLoc(SourceLocation L) { ForStmtBits.ForLoc = L; } @@ -2557,15 +2761,6 @@ static bool classof(const Stmt *T) { return T->getStmtClass() == ForStmtClass; } - - // Iterators - child_range children() { - return child_range(&SubExprs[0], &SubExprs[0]+END_EXPR); - } - - const_child_range children() const { - return const_child_range(&SubExprs[0], &SubExprs[0] + END_EXPR); - } }; /// GotoStmt - This represents a direct goto. diff --git a/clang/include/clang/AST/StmtCXX.h b/clang/include/clang/AST/StmtCXX.h --- a/clang/include/clang/AST/StmtCXX.h +++ b/clang/include/clang/AST/StmtCXX.h @@ -131,72 +131,87 @@ /// This is stored in a partially-desugared form to allow full semantic /// analysis of the constituent components. The original syntactic components /// can be extracted using getLoopVariable and getRangeInit. -class CXXForRangeStmt : public Stmt { +class CXXForRangeStmt : public MaybeCanonicalLoopStmt { + enum { + INIT, + RANGE, + BEGINSTMT, + ENDSTMT, + COND, + INC, + LOOPVAR, + BODY, + LastSub = BODY + }; + static constexpr unsigned SubCount = LastSub + 1; + SourceLocation ForLoc; - enum { INIT, RANGE, BEGINSTMT, ENDSTMT, COND, INC, LOOPVAR, BODY, END }; - // SubExprs[RANGE] is an expression or declstmt. - // SubExprs[COND] and SubExprs[INC] are expressions. - Stmt *SubExprs[END]; SourceLocation CoawaitLoc; SourceLocation ColonLoc; SourceLocation RParenLoc; friend class ASTStmtReader; -public: - CXXForRangeStmt(Stmt *InitStmt, DeclStmt *Range, DeclStmt *Begin, - DeclStmt *End, Expr *Cond, Expr *Inc, DeclStmt *LoopVar, - Stmt *Body, SourceLocation FL, SourceLocation CAL, - SourceLocation CL, SourceLocation RPL); - CXXForRangeStmt(EmptyShell Empty) : Stmt(CXXForRangeStmtClass, Empty) { } - Stmt *getInit() { return SubExprs[INIT]; } +private: + CXXForRangeStmt(LoopChildren *Data) + : MaybeCanonicalLoopStmt(CXXForRangeStmtClass, Data) {} + +public: + static CXXForRangeStmt * + create(ASTContext &Ctx, Stmt *InitStmt, DeclStmt *Range, DeclStmt *Begin, + DeclStmt *End, Expr *Cond, Expr *Inc, DeclStmt *LoopVar, Stmt *Body, + SourceLocation FL, SourceLocation CAL, SourceLocation CL, + SourceLocation RPL, CapturedStmt *DistanceFunc = nullptr, + CapturedStmt *LoopVarFunc = nullptr, + DeclRefExpr *LoopVarRef = nullptr); + static CXXForRangeStmt *createEmpty(ASTContext &Ctx); + + Stmt *getInit() { return getChildren()[INIT]; } VarDecl *getLoopVariable(); Expr *getRangeInit(); - const Stmt *getInit() const { return SubExprs[INIT]; } + const Stmt *getInit() const { return getChildren()[INIT]; } const VarDecl *getLoopVariable() const; const Expr *getRangeInit() const; - - DeclStmt *getRangeStmt() { return cast(SubExprs[RANGE]); } + DeclStmt *getRangeStmt() { return cast(getChildren()[RANGE]); } DeclStmt *getBeginStmt() { - return cast_or_null(SubExprs[BEGINSTMT]); + return cast_or_null(getChildren()[BEGINSTMT]); + } + DeclStmt *getEndStmt() { + return cast_or_null(getChildren()[ENDSTMT]); } - DeclStmt *getEndStmt() { return cast_or_null(SubExprs[ENDSTMT]); } - Expr *getCond() { return cast_or_null(SubExprs[COND]); } - Expr *getInc() { return cast_or_null(SubExprs[INC]); } - DeclStmt *getLoopVarStmt() { return cast(SubExprs[LOOPVAR]); } - Stmt *getBody() { return SubExprs[BODY]; } + Expr *getCond() { return cast_or_null(getChildren()[COND]); } + Expr *getInc() { return cast_or_null(getChildren()[INC]); } + DeclStmt *getLoopVarStmt() { return cast(getChildren()[LOOPVAR]); } + Stmt *getBody() { return getChildren()[BODY]; } const DeclStmt *getRangeStmt() const { - return cast(SubExprs[RANGE]); + return cast(getChildren()[RANGE]); } const DeclStmt *getBeginStmt() const { - return cast_or_null(SubExprs[BEGINSTMT]); + return cast_or_null(getChildren()[BEGINSTMT]); } const DeclStmt *getEndStmt() const { - return cast_or_null(SubExprs[ENDSTMT]); + return cast_or_null(getChildren()[ENDSTMT]); } const Expr *getCond() const { - return cast_or_null(SubExprs[COND]); - } - const Expr *getInc() const { - return cast_or_null(SubExprs[INC]); + return cast_or_null(getChildren()[COND]); } + const Expr *getInc() const { return cast_or_null(getChildren()[INC]); } const DeclStmt *getLoopVarStmt() const { - return cast(SubExprs[LOOPVAR]); + return cast(getChildren()[LOOPVAR]); } - const Stmt *getBody() const { return SubExprs[BODY]; } + const Stmt *getBody() const { return getChildren()[BODY]; } - void setInit(Stmt *S) { SubExprs[INIT] = S; } - void setRangeInit(Expr *E) { SubExprs[RANGE] = reinterpret_cast(E); } - void setRangeStmt(Stmt *S) { SubExprs[RANGE] = S; } - void setBeginStmt(Stmt *S) { SubExprs[BEGINSTMT] = S; } - void setEndStmt(Stmt *S) { SubExprs[ENDSTMT] = S; } - void setCond(Expr *E) { SubExprs[COND] = reinterpret_cast(E); } - void setInc(Expr *E) { SubExprs[INC] = reinterpret_cast(E); } - void setLoopVarStmt(Stmt *S) { SubExprs[LOOPVAR] = S; } - void setBody(Stmt *S) { SubExprs[BODY] = S; } + void setInit(Stmt *S) { getChildren()[INIT] = S; } + void setRangeStmt(Stmt *S) { getChildren()[RANGE] = S; } + void setBeginStmt(Stmt *S) { getChildren()[BEGINSTMT] = S; } + void setEndStmt(Stmt *S) { getChildren()[ENDSTMT] = S; } + void setCond(Expr *E) { getChildren()[COND] = reinterpret_cast(E); } + void setInc(Expr *E) { getChildren()[INC] = reinterpret_cast(E); } + void setLoopVarStmt(Stmt *S) { getChildren()[LOOPVAR] = S; } + void setBody(Stmt *S) { getChildren()[BODY] = S; } SourceLocation getForLoc() const { return ForLoc; } SourceLocation getCoawaitLoc() const { return CoawaitLoc; } @@ -205,21 +220,12 @@ SourceLocation getBeginLoc() const LLVM_READONLY { return ForLoc; } SourceLocation getEndLoc() const LLVM_READONLY { - return SubExprs[BODY]->getEndLoc(); + return getBody()->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == CXXForRangeStmtClass; } - - // Iterators - child_range children() { - return child_range(&SubExprs[0], &SubExprs[END]); - } - - const_child_range children() const { - return const_child_range(&SubExprs[0], &SubExprs[END]); - } }; /// Representation of a Microsoft __if_exists or __if_not_exists diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -13,7 +13,6 @@ def SwitchStmt : StmtNode; def WhileStmt : StmtNode; def DoStmt : StmtNode; -def ForStmt : StmtNode; def GotoStmt : StmtNode; def IndirectGotoStmt : StmtNode; def ContinueStmt : StmtNode; @@ -25,6 +24,10 @@ def DefaultStmt : StmtNode; def CapturedStmt : StmtNode; +def MaybeCanonicalLoopStmt : StmtNode; +def ForStmt : StmtNode; +def CXXForRangeStmt : StmtNode; + // Statements that might produce a value (for example, as the last non-null // statement in a GNU statement-expression). def ValueStmt : StmtNode; @@ -48,7 +51,6 @@ // C++ statements def CXXCatchStmt : StmtNode; def CXXTryStmt : StmtNode; -def CXXForRangeStmt : StmtNode; // C++ Coroutines TS statements def CoroutineBodyStmt : StmtNode; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10479,6 +10479,11 @@ /// Initialization of captured region for OpenMP region. void ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope); + + /// Called for syntactical loops (ForStmt for CXXRangeForStmt) associated to + /// an OpenMP loop directive. + StmtResult ActOnOpenMPCanonicalLoop(Stmt *AStmt); + /// End of OpenMP region. /// /// \param S Statement associated with the current OpenMP region. diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -6246,10 +6246,9 @@ if (Err) return std::move(Err); - return new (Importer.getToContext()) ForStmt( - Importer.getToContext(), - ToInit, ToCond, ToConditionVariable, ToInc, ToBody, ToForLoc, ToLParenLoc, - ToRParenLoc); + return ForStmt::create(Importer.getToContext(), ToInit, ToCond, + ToConditionVariable, ToInc, ToBody, ToForLoc, + ToLParenLoc, ToRParenLoc); } ExpectedStmt ASTNodeImporter::VisitGotoStmt(GotoStmt *S) { @@ -6358,9 +6357,10 @@ if (Err) return std::move(Err); - return new (Importer.getToContext()) CXXForRangeStmt( - ToInit, ToRangeStmt, ToBeginStmt, ToEndStmt, ToCond, ToInc, ToLoopVarStmt, - ToBody, ToForLoc, ToCoawaitLoc, ToColonLoc, ToRParenLoc); + return CXXForRangeStmt::create(Importer.getToContext(), ToInit, ToRangeStmt, + ToBeginStmt, ToEndStmt, ToCond, ToInc, + ToLoopVarStmt, ToBody, ToForLoc, ToCoawaitLoc, + ToColonLoc, ToRParenLoc); } ExpectedStmt diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp --- a/clang/lib/AST/Stmt.cpp +++ b/clang/lib/AST/Stmt.cpp @@ -995,36 +995,93 @@ return !getCond()->EvaluateKnownConstInt(Ctx) ? getElse() : getThen(); } -ForStmt::ForStmt(const ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar, - Expr *Inc, Stmt *Body, SourceLocation FL, SourceLocation LP, - SourceLocation RP) - : Stmt(ForStmtClass), LParenLoc(LP), RParenLoc(RP) -{ - SubExprs[INIT] = Init; - setConditionVariable(C, condVar); - SubExprs[COND] = Cond; - SubExprs[INC] = Inc; - SubExprs[BODY] = Body; - ForStmtBits.ForLoc = FL; +LoopChildren::LoopChildren(unsigned NumChildren) : NumChildren(NumChildren) { + for (unsigned i = 0; i < NumChildren; ++i) + getTrailingObjects()[i] = nullptr; +} + +Stmt::child_range LoopChildren::children() { + Stmt **Elts = getTrailingObjects(); + + // For compatibility, hide the canonical loop sub-stmts if not present. + if (!Elts[NumChildren - 1] && !Elts[NumChildren - 2] && + !Elts[NumChildren - 3]) + return Stmt::child_range(&Elts[0], &Elts[NumChildren - 3]); + + return Stmt::child_range(&Elts[0], &Elts[NumChildren]); +} + +Stmt::const_child_range LoopChildren::children() const { + Stmt *const *Elts = getTrailingObjects(); + + // For compatibility, hide the canonical loop sub-stmts if not present. + if (!Elts[NumChildren - 1] && !Elts[NumChildren - 2] && + !Elts[NumChildren - 3]) + return Stmt::const_child_range(&Elts[0], &Elts[NumChildren - 3]); + + return Stmt::const_child_range(&Elts[0], &Elts[NumChildren]); +} + +void MaybeCanonicalLoopStmt::setDistanceFunc(Stmt *S) { + assert(!S || isa(S)); + getCanonicalChildren()[DISTANCE_FUNC] = S; +} + +void MaybeCanonicalLoopStmt::setLoopVarFunc(Stmt *S) { + assert(!S || isa(S)); + getCanonicalChildren()[LOOPVAR_FUNC] = S; +} + +void MaybeCanonicalLoopStmt::setLoopVarRef(Expr *E) { + assert(!E || isa(E)); + getCanonicalChildren()[LOOPVAR_REF] = E; +} + +ForStmt *ForStmt::create(ASTContext &Ctx, Stmt *Init, Expr *Cond, + VarDecl *condVar, Expr *Inc, Stmt *Body, + SourceLocation FL, SourceLocation LP, + SourceLocation RP, CapturedStmt *DistanceFunc, + CapturedStmt *LoopVarFunc, DeclRefExpr *LoopVarRef) { + ForStmt *Result = createEmpty(Ctx); + Result->setInit(Init); + Result->setConditionVariable(Ctx, condVar); + Result->setCond(Cond); + Result->setInc(Inc); + Result->setBody(Body); + Result->setForLoc(FL); + Result->setLParenLoc(LP); + Result->setRParenLoc(RP); + Result->setDistanceFunc(DistanceFunc); + Result->setLoopVarFunc(LoopVarFunc); + Result->setLoopVarRef(LoopVarRef); + return Result; +} + +ForStmt *ForStmt::createEmpty(ASTContext &Ctx) { + unsigned NumChildren = MaybeCanonicalLoopStmt::SubStmtCount + SubCount; + void *Mem = Ctx.Allocate(sizeof(ForStmt) + LoopChildren::size(NumChildren)); + void *DataMem = (char *)Mem + sizeof(ForStmt); + auto *Data = new (DataMem) LoopChildren(NumChildren); + return new (Mem) ForStmt(Data); } VarDecl *ForStmt::getConditionVariable() const { - if (!SubExprs[CONDVAR]) + if (!getConditionVariableDeclStmt()) return nullptr; - auto *DS = cast(SubExprs[CONDVAR]); + auto *DS = cast(getChildren()[CONDVAR]); return cast(DS->getSingleDecl()); } void ForStmt::setConditionVariable(const ASTContext &C, VarDecl *V) { if (!V) { - SubExprs[CONDVAR] = nullptr; + getChildren()[CONDVAR] = nullptr; return; } SourceRange VarRange = V->getSourceRange(); - SubExprs[CONDVAR] = new (C) DeclStmt(DeclGroupRef(V), VarRange.getBegin(), - VarRange.getEnd()); + getChildren()[CONDVAR] = + new (C) DeclStmt(DeclGroupRef(V), VarRange.getBegin(), VarRange.getEnd()); } SwitchStmt::SwitchStmt(const ASTContext &Ctx, Stmt *Init, VarDecl *Var, @@ -1266,13 +1323,6 @@ break; case VCK_ByCopy: assert(Var && "capturing by copy must have a variable!"); - assert( - (Var->getType()->isScalarType() || (Var->getType()->isReferenceType() && - Var->getType() - ->castAs() - ->getPointeeType() - ->isScalarType())) && - "captures by copy are expected to have a scalar type!"); break; case VCK_VLAType: assert(!Var && diff --git a/clang/lib/AST/StmtCXX.cpp b/clang/lib/AST/StmtCXX.cpp --- a/clang/lib/AST/StmtCXX.cpp +++ b/clang/lib/AST/StmtCXX.cpp @@ -44,22 +44,39 @@ std::copy(handlers.begin(), handlers.end(), Stmts + 1); } -CXXForRangeStmt::CXXForRangeStmt(Stmt *Init, DeclStmt *Range, - DeclStmt *BeginStmt, DeclStmt *EndStmt, - Expr *Cond, Expr *Inc, DeclStmt *LoopVar, - Stmt *Body, SourceLocation FL, - SourceLocation CAL, SourceLocation CL, - SourceLocation RPL) - : Stmt(CXXForRangeStmtClass), ForLoc(FL), CoawaitLoc(CAL), ColonLoc(CL), - RParenLoc(RPL) { - SubExprs[INIT] = Init; - SubExprs[RANGE] = Range; - SubExprs[BEGINSTMT] = BeginStmt; - SubExprs[ENDSTMT] = EndStmt; - SubExprs[COND] = Cond; - SubExprs[INC] = Inc; - SubExprs[LOOPVAR] = LoopVar; - SubExprs[BODY] = Body; +CXXForRangeStmt * +CXXForRangeStmt::create(ASTContext &Ctx, Stmt *InitStmt, DeclStmt *Range, + DeclStmt *Begin, DeclStmt *End, Expr *Cond, Expr *Inc, + DeclStmt *LoopVar, Stmt *Body, SourceLocation FL, + SourceLocation CAL, SourceLocation CL, + SourceLocation RPL, CapturedStmt *DistanceFunc, + CapturedStmt *LoopVarFunc, DeclRefExpr *LoopVarRef) { + CXXForRangeStmt *Result = createEmpty(Ctx); + Result->setInit(InitStmt); + Result->setRangeStmt(Range); + Result->setBeginStmt(Begin); + Result->setEndStmt(End); + Result->setCond(Cond); + Result->setInc(Inc); + Result->setLoopVarStmt(LoopVar); + Result->setBody(Body); + Result->ForLoc = FL; + Result->CoawaitLoc = CAL; + Result->ColonLoc = CL; + Result->RParenLoc = RPL; + Result->setDistanceFunc(DistanceFunc); + Result->setLoopVarFunc(LoopVarFunc); + Result->setLoopVarRef(LoopVarRef); + return Result; +} + +CXXForRangeStmt *CXXForRangeStmt::createEmpty(ASTContext &Ctx) { + unsigned NumChildren = MaybeCanonicalLoopStmt::SubStmtCount + SubCount; + void *Mem = + Ctx.Allocate(sizeof(CXXForRangeStmt) + LoopChildren::size(NumChildren)); + void *DataMem = (char *)Mem + sizeof(CXXForRangeStmt); + auto *Data = new (DataMem) LoopChildren(NumChildren); + return new (Mem) CXXForRangeStmt(Data); } Expr *CXXForRangeStmt::getRangeInit() { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -928,6 +928,10 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, ArrayRef ForAttrs) { + // If applicable, emit an OpenMP canonical loop instead. + if (getLangOpts().OpenMPIRBuilder && S.getDistanceFunc()) + return EmitOMPCanonicalLoop(&S); + JumpDest LoopExit = getJumpDestInCurrentScope("for.end"); LexicalScope ForScope(*this, S.getSourceRange()); @@ -1042,6 +1046,10 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, ArrayRef ForAttrs) { + // If applicable, emit an OpenMP canonical loop instead. + if (getLangOpts().OpenMPIRBuilder && S.getDistanceFunc()) + return EmitOMPCanonicalLoop(&S); + JumpDest LoopExit = getJumpDestInCurrentScope("for.end"); LexicalScope ForScope(*this, S.getSourceRange()); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1858,6 +1858,115 @@ BreakContinueStack.pop_back(); } +using EmittedClosureTy = std::pair; + +/// Emit a captured statement and return the function as well as its captured +/// closure context. +static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, + const CapturedStmt *S) { + LValue CapStruct = ParentCGF.InitCapturedStruct(*S); + CodeGenFunction CGF(ParentCGF.CGM, true); + std::unique_ptr CSI = + std::make_unique(*S); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); + llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); + + return {F, CapStruct.getPointer(ParentCGF)}; +} + +/// Emit a call to a previously captured closure. +static llvm::CallInst * +emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, + llvm::ArrayRef Args) { + // Append the closure context to the argument. + SmallVector EffectiveArgs; + EffectiveArgs.reserve(Args.size() + 1); + llvm::append_range(EffectiveArgs, Args); + EffectiveArgs.push_back(Cap.second); + + return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); +} + +llvm::CanonicalLoopInfo * +CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { + assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); + + EmitStmt(S); + + assert(OMPLoopNestStack.size() >= Depth && "Found too few loops"); + return OMPLoopNestStack.front(); +} + +void CodeGenFunction::EmitOMPCanonicalLoop(const MaybeCanonicalLoopStmt *S) { + assert(S->getDistanceFunc()); + assert(S->getLoopVarFunc()); + assert(S->getLoopVarRef()); + + const Stmt *SyntacticalLoop = S; + LexicalScope ForScope(*this, S->getSourceRange()); + + // Emit init statements. The Distance/LoopVar funcs may reference variable + // declarations they contain. + const Stmt *BodyStmt; + if (auto *For = dyn_cast(SyntacticalLoop)) { + if (const Stmt *InitStmt = For->getInit()) + EmitStmt(InitStmt); + BodyStmt = For->getBody(); + } else if (auto *RangeFor = dyn_cast(SyntacticalLoop)) { + if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) + EmitStmt(RangeStmt); + if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) + EmitStmt(BeginStmt); + if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) + EmitStmt(EndStmt); + if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) + EmitStmt(LoopVarStmt); + BodyStmt = RangeFor->getBody(); + } else + llvm_unreachable("Expected for-stmt or range-based for-stmt"); + + // Emit closure for later use. By-value captures will be captured here. + const CapturedStmt *DistanceFunc = S->getDistanceFunc(); + EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); + const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); + EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); + + // Call the distance function to get the number of iterations of the loop to + // come. + QualType LogicalTy = DistanceFunc->getCapturedDecl() + ->getParam(0) + ->getType() + .getNonReferenceType(); + Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); + emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); + llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); + + // Emit the loop structure. + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + llvm::CanonicalLoopInfo *CL = + OMPBuilder.createCanonicalLoop(Builder, {}, DistVal); + + // Emit the loop body: Convert the logical iteration number to the loop + // variable and emit the body. + Builder.restoreIP(CL->getBodyIP()); + const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); + LValue LCVal = EmitLValue(LoopVarRef); + Address LoopVarAddress = LCVal.getAddress(*this); + emitCapturedStmtCall(*this, LoopVarClosure, + {LoopVarAddress.getPointer(), CL->getIndVar()}); + { + RunCleanupsScope BodyScope(*this); + EmitStmt(BodyStmt); + } + + // Finish up the loop. + Builder.restoreIP(CL->getAfterIP()); + ForScope.ForceCleanup(); + + // Remember the CanonicalLoopInfo for parent AST nodes consuming it. + OMPLoopNestStack.push_back(CL); +} + void CodeGenFunction::EmitOMPInnerLoop( const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, @@ -1875,6 +1984,7 @@ const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); const Stmt *SS = ICS->getCapturedStmt(); const AttributedStmt *AS = dyn_cast_or_null(SS); + OMPLoopNestStack.clear(); if (AS) LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), @@ -2424,6 +2534,7 @@ llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); EmitBlock(CondBlock); const SourceRange R = S.getSourceRange(); + OMPLoopNestStack.clear(); LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); @@ -2507,6 +2618,7 @@ } EmitBranch(CondBlock); + OMPLoopNestStack.clear(); LoopStack.pop(); // Emit the fall-through block. EmitBlock(LoopExit.getBlock()); @@ -3351,8 +3463,24 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&CodeGen = [this, &S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + // Use the OpenMPIRBuilder if enabled. + if (CGM.getLangOpts().OpenMPIRBuilder) { + // Emit the associated statement and get its loop representation. + const Stmt *Inner = S.getRawStmt(); + llvm::CanonicalLoopInfo *CLI = + EmitOMPCollapsedCanonicalLoopNest(Inner, 1); + + bool NeedsBarrer = !S.getSingleClause(); + llvm::OpenMPIRBuilder &OMPBuilder = + CGM.getOpenMPRuntime().getOMPBuilder(); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); + OMPBuilder.createStaticWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrer); + return; + } + HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); }; { @@ -3363,9 +3491,11 @@ S.hasCancel()); } - // Emit an implicit barrier at the end. - if (!S.getSingleClause() || HasLastprivates) - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + if (!CGM.getLangOpts().OpenMPIRBuilder) { + // Emit an implicit barrier at the end. + if (!S.getSingleClause() || HasLastprivates) + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -50,6 +50,7 @@ class SwitchInst; class Twine; class Value; +class CanonicalLoopInfo; } namespace clang { @@ -276,6 +277,18 @@ // because of jumps. VarBypassDetector Bypasses; + /// List of recently emitted OMPCanonicalLoops. + /// + /// Since OMPCanonicalLoops are nested inside other statements (in particular + /// CapturedStmt generated by OMPExecutableDirective and non-perfectly nested + /// loops), we cannot directly call OMPEmitOMPCanonicalLoop and receive its + /// llvm::CanonicalLoopInfo. Instead, we call EmitStmt and any + /// OMPEmitOMPCanonicalLoop called by it will add its CanonicalLoopInfo to + /// this stack when done. Entering a new loop requires clearing this list; it + /// either means we start parsing an new loop nest or sequential loop that are + /// not nested in each other. + SmallVector OMPLoopNestStack; + // CodeGen lambda for loops and support for ordered clause typedef llvm::function_ref @@ -3498,6 +3511,18 @@ static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S); + + /// Emit the Stmt \p S and return its topmost canonical loop, if any. + /// TODO: The \p Depth paramter is not yet implemented and must be 1. In the + /// future it is meant to be the number of loops expected in the loop nests + /// (usually specified by the "collapse" clause) that are collapsed to a + /// single loop by this function. + llvm::CanonicalLoopInfo *EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, + int Depth); + + /// Emit an MaybeCanonicalLoopStmt using the OpenMPIRBuilder. + void EmitOMPCanonicalLoop(const MaybeCanonicalLoopStmt *S); + /// Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2529,7 +2529,15 @@ // the captured region. Code elsewhere assumes that any FunctionScopeInfo // should have at least one compound statement scope within it. ParsingOpenMPDirectiveRAII NormalScope(*this, /*Value=*/false); - AssociatedStmt = (Sema::CompoundScopeRAII(Actions), ParseStatement()); + { + Sema::CompoundScopeRAII Scope(Actions); + AssociatedStmt = ParseStatement(); + + if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind) && + getLangOpts().OpenMPIRBuilder) + AssociatedStmt = + Actions.ActOnOpenMPCanonicalLoop(AssociatedStmt.get()); + } AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses); } else if (DKind == OMPD_target_update || DKind == OMPD_target_enter_data || DKind == OMPD_target_exit_data) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -17263,18 +17263,17 @@ /// Capture the given variable in the captured region. -static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI, - VarDecl *Var, - SourceLocation Loc, - const bool BuildAndDiagnose, - QualType &CaptureType, - QualType &DeclRefType, - const bool RefersToCapturedVariable, - Sema &S, bool Invalid) { +static bool captureInCapturedRegion( + CapturedRegionScopeInfo *RSI, VarDecl *Var, SourceLocation Loc, + const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, + const bool RefersToCapturedVariable, Sema::TryCaptureKind Kind, + bool IsTopScope, Sema &S, bool Invalid) { // By default, capture variables by reference. bool ByRef = true; - // Using an LValue reference type is consistent with Lambdas (see below). - if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) { + if (IsTopScope && Kind != Sema::TryCapture_Implicit) { + ByRef = (Kind == Sema::TryCapture_ExplicitByRef); + } else if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) { + // Using an LValue reference type is consistent with Lambdas (see below). if (S.isOpenMPCapturedDecl(Var)) { bool HasConst = DeclRefType.isConstQualified(); DeclRefType = DeclRefType.getUnqualifiedType(); @@ -17620,9 +17619,9 @@ DeclRefType, Nested, *this, Invalid); Nested = true; } else if (CapturedRegionScopeInfo *RSI = dyn_cast(CSI)) { - Invalid = !captureInCapturedRegion(RSI, Var, ExprLoc, BuildAndDiagnose, - CaptureType, DeclRefType, Nested, - *this, Invalid); + Invalid = !captureInCapturedRegion( + RSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, + Kind, /*IsTopScope*/ I == N - 1, *this, Invalid); Nested = true; } else { LambdaScopeInfo *LSI = cast(CSI); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5130,6 +5130,362 @@ } } +namespace { +/// Rewrite statements and expressions for Sema \p Actions CurContext. +/// Used to capture variable references if already parsed statements/expressions +/// into a CapturedStatement. +class CaptureVars : public TreeTransform { + using BaseTransform = TreeTransform; + +public: + CaptureVars(Sema &Actions) : BaseTransform(Actions) {} + + bool AlwaysRebuild() { return true; } +}; +} // namespace + +/// Create a closure that computes the number of iterations of a loop. +/// +/// \param Actions The Sema object. +/// \param LogicalTy Type for the logical iteration number. +/// \param Rel Comparison operator of the loop condition. +/// \param StartExpr Value of the loop counter at the first iteration. +/// \param StopExpr Expression the loop counter is compared against in the loop +/// condition. \param Step Amount of increment after each iteration. +/// +/// \return Closure (CapturedStmt) of the distance calculation. +static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy, + BinaryOperator::Opcode Rel, + Expr *StartExpr, Expr *StopExpr, + Expr *Step) { + ASTContext &Ctx = Actions.getASTContext(); + TypeSourceInfo *LogicalTSI = Ctx.getTrivialTypeSourceInfo(LogicalTy); + + // Captured regions currently don't support return values, we use an + // out-parameter instead. All inputs are implicit captures. + // TODO: Instead of capturing each DeclRefExpr occurring in + // StartExpr/StopExpr/Step, these could also be passed as a value capture. + QualType ResultTy = Ctx.getLValueReferenceType(LogicalTy); + Sema::CapturedParamNameType Params[] = {{"Distance", ResultTy}, + {StringRef(), QualType()}}; + Actions.ActOnCapturedRegionStart({}, nullptr, CR_Default, Params); + + Expr *Body; + { + Sema::CompoundScopeRAII CompoundScope(Actions); + CapturedDecl *CS = cast(Actions.CurContext); + + // Get the LValue expression for the result. + ImplicitParamDecl *DistParam = CS->getParam(0); + DeclRefExpr *DistRef = Actions.BuildDeclRefExpr( + DistParam, LogicalTy, VK_LValue, {}, nullptr, nullptr, {}, nullptr); + + // Capture all referenced variable references. + CaptureVars Recap(Actions); + Expr *NewStart = AssertSuccess(Recap.TransformExpr(StartExpr)); + Expr *NewStop = AssertSuccess(Recap.TransformExpr(StopExpr)); + Expr *NewStep = AssertSuccess(Recap.TransformExpr(Step)); + + IntegerLiteral *Zero = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 0), LogicalTy, {}); + Expr *Dist; + if (Rel == BO_NE) { + // When using a != comparison, the increment can be +1 or -1. This can be + // dynamic at runtime, so we need to check for the direction. + Expr *IsNegStep = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_LT, NewStep, Zero)); + + // Positive increment. + Expr *ForwardRange = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStop, NewStart)); + ForwardRange = AssertSuccess( + Actions.BuildCStyleCastExpr({}, LogicalTSI, {}, ForwardRange)); + Expr *ForwardDist = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Div, ForwardRange, NewStep)); + + // Negative increment. + Expr *BackwardRange = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStart, NewStop)); + BackwardRange = AssertSuccess( + Actions.BuildCStyleCastExpr({}, LogicalTSI, {}, BackwardRange)); + Expr *NegIncAmount = + AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, NewStep)); + Expr *BackwardDist = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Div, BackwardRange, NegIncAmount)); + + // Use the appropriate case. + Dist = AssertSuccess(Actions.ActOnConditionalOp( + {}, {}, IsNegStep, BackwardDist, ForwardDist)); + } else { + assert((Rel == BO_LT || Rel == BO_LE || Rel == BO_GE || Rel == BO_GT) && + "Expected one of these relational operators"); + + // We can derive the direction from any other comparison operator. It is + // non well-formed OpenMP if Step increments/decrements in the other + // directions. Whether at least the first iteration passes the loop + // condition. + Expr *HasAnyIteration = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, Rel, NewStart, NewStop)); + + // Compute the range between first and last counter value. + Expr *Range; + if (Rel == BO_GE || Rel == BO_GT) + Range = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStart, NewStop)); + else + Range = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStop, NewStart)); + + // Ensure unsigned range space. + Range = + AssertSuccess(Actions.BuildCStyleCastExpr({}, LogicalTSI, {}, Range)); + + if (Rel == BO_LE || Rel == BO_GE) { + // Add one to the range if the relational operator is inclusive. + Range = + AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range)); + } + + // Divide by the absolute step amount. + if (Rel == BO_GE || Rel == BO_GT) + NewStep = + AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, NewStep)); + Dist = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Div, Range, NewStep)); + + // If there is not at least one iteration, the range contains garbage. Fix + // to zero in this case. + Dist = AssertSuccess( + Actions.ActOnConditionalOp({}, {}, HasAnyIteration, Dist, Zero)); + } + + // Assign the result to the out-parameter. + Body = AssertSuccess(Actions.BuildBinOp(Actions.getCurScope(), {}, + BO_Assign, DistRef, Dist)); + } + return cast( + AssertSuccess(Actions.ActOnCapturedRegionEnd(Body))); +} + +/// Create a closure that computes the loop variable from the logical iteration +/// number. +/// +/// \param Actions The Sema object. +/// \param LoopVarTy Type for the loop variable used for result value. +/// \param LogicalTy Type for the logical iteration number. +/// \param StartExpr Value of the loop counter at the first iteration. +/// \param Step Amount of increment after each iteration. +/// \param Deref Whether the loop variable is a dereference of the loop +/// counter variable. +/// +/// \return Closure (CapturedStmt) of the loop value calculation. +static CapturedStmt *buildLoopVarFunc(Sema &Actions, QualType LoopVarTy, + QualType LogicalTy, + DeclRefExpr *StartExpr, Expr *Step, + bool Deref) { + ASTContext &Ctx = Actions.getASTContext(); + + // Pass the result as an out-parameter. Passing as return value would require + // the OpenMPIRBuilder to know additional C/C++ semantics, such as how to + // invoke a copy constructor. + QualType TargetParamTy = Ctx.getLValueReferenceType(LoopVarTy); + Sema::CapturedParamNameType Params[] = {{"LoopVar", TargetParamTy}, + {"Logical", LogicalTy}, + {StringRef(), QualType()}}; + Actions.ActOnCapturedRegionStart({}, nullptr, CR_Default, Params); + + // Capture the initial iterator which represents the LoopVar value at the + // zero's logical iteration. Since the original ForStmt/CXXRangeForStmt update + // it in every iteration, capture it by value before it is modified. + VarDecl *StartVar = cast(StartExpr->getDecl()); + bool Invalid = Actions.tryCaptureVariable(StartVar, {}, + Sema::TryCapture_ExplicitByVal, {}); + (void)Invalid; + assert(!Invalid && "Expecting capture-by-value to work."); + + Expr *Body; + { + Sema::CompoundScopeRAII CompoundScope(Actions); + auto *CS = cast(Actions.CurContext); + + ImplicitParamDecl *TargetParam = CS->getParam(0); + DeclRefExpr *TargetRef = Actions.BuildDeclRefExpr( + TargetParam, LoopVarTy, VK_LValue, {}, nullptr, nullptr, {}, nullptr); + ImplicitParamDecl *IndvarParam = CS->getParam(1); + DeclRefExpr *LogicalRef = Actions.BuildDeclRefExpr( + IndvarParam, LogicalTy, VK_LValue, {}, nullptr, nullptr, {}, nullptr); + + // Capture the Start expression. + CaptureVars Recap(Actions); + Expr *NewStart = AssertSuccess(Recap.TransformExpr(StartExpr)); + Expr *NewStep = AssertSuccess(Recap.TransformExpr(Step)); + + Expr *Skip = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Mul, NewStep, LogicalRef)); + // TODO: Explicitly cast to the iterator's difference_type instead of + // relying on implicit conversion. + Expr *Advanced = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Add, NewStart, Skip)); + + if (Deref) { + // For range-based for-loops convert the loop counter value to a concrete + // loop variable value by dereferencing the iterator. + Advanced = + AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Deref, Advanced)); + } + + // Assign the result to the output parameter. + Body = AssertSuccess(Actions.BuildBinOp(Actions.getCurScope(), {}, + BO_Assign, TargetRef, Advanced)); + } + return cast( + AssertSuccess(Actions.ActOnCapturedRegionEnd(Body))); +} + +StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) { + ASTContext &Ctx = getASTContext(); + + // Extract the common elements of ForStmt and CXXForRangeStmt: + // Loop variable, repeat condition, increment + Expr *Cond, *Inc; + VarDecl *CounterDecl, *LVDecl; + if (auto *For = dyn_cast(AStmt)) { + Stmt *Init = For->getInit(); + if (auto *LCVarDeclStmt = dyn_cast(Init)) { + // For statement declares loop variable. + CounterDecl = cast(LCVarDeclStmt->getSingleDecl()); + } else if (auto *LCAssign = cast(Init)) { + // For statement reuses variable. + assert(LCAssign->getOpcode() == BO_Assign && + "init part must be a loop variable assignment"); + auto *CounterRef = cast(LCAssign->getLHS()); + CounterDecl = cast(CounterRef->getDecl()); + } else + llvm_unreachable("Cannot determine loop variable"); + LVDecl = CounterDecl; + + Cond = For->getCond(); + Inc = For->getInc(); + } else if (auto *RangeFor = dyn_cast(AStmt)) { + DeclStmt *BeginStmt = RangeFor->getBeginStmt(); + CounterDecl = cast(BeginStmt->getSingleDecl()); + LVDecl = RangeFor->getLoopVariable(); + + Cond = RangeFor->getCond(); + Inc = RangeFor->getInc(); + } else + llvm_unreachable("unhandled kind of loop"); + + QualType CounterTy = CounterDecl->getType(); + QualType LVTy = LVDecl->getType(); + + // Analyze the loop condition. + Expr *LHS, *RHS; + BinaryOperator::Opcode CondRel; + Cond = Cond->IgnoreImplicit(); + if (auto *CondBinExpr = dyn_cast(Cond)) { + LHS = CondBinExpr->getLHS(); + RHS = CondBinExpr->getRHS(); + CondRel = CondBinExpr->getOpcode(); + } else if (auto *CondCXXOp = dyn_cast(Cond)) { + assert(CondCXXOp->getOperator() == OO_ExclaimEqual && + "Expected != loop condition for iterator-based loops"); + assert(CondCXXOp->getNumArgs() == 2 && "Comparison should have 2 operands"); + LHS = CondCXXOp->getArg(0); + RHS = CondCXXOp->getArg(1); + CondRel = BO_NE; + } else + llvm_unreachable("unexpected loop condition"); + + // Normalize such that the loop counter is on the LHS. + if (!isa(LHS->IgnoreImplicit()) || + cast(LHS->IgnoreImplicit())->getDecl() != CounterDecl) { + std::swap(LHS, RHS); + CondRel = BinaryOperator::reverseComparisonOp(CondRel); + } + auto *CounterRef = cast(LHS->IgnoreImplicit()); + + // Decide the bit width for the logical iteration counter. By default use the + // unsigned ptrdiff_t integer size (for iterators and pointers). + // TODO: For iterators, use iterator::difference_type, + // std::iterator_traits<>::difference_type or decltype(it - end). + QualType LogicalTy = Ctx.getUnsignedPointerDiffType(); + if (CounterTy->isIntegerType()) { + unsigned BitWidth = Ctx.getIntWidth(CounterTy); + LogicalTy = Ctx.getIntTypeForBitwidth(BitWidth, false); + } + + // Analyze the loop increment. + Expr *Step; + if (auto *IncUn = dyn_cast(Inc)) { + int Direction; + switch (IncUn->getOpcode()) { + case UO_PreInc: + case UO_PostInc: + Direction = 1; + break; + case UO_PreDec: + case UO_PostDec: + Direction = -1; + break; + default: + llvm_unreachable("unhandled unary increment operator"); + } + Step = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), Direction), LogicalTy, {}); + } else if (auto *IncBin = dyn_cast(Inc)) { + if (IncBin->getOpcode() == BO_AddAssign) { + Step = IncBin->getRHS(); + } else if (IncBin->getOpcode() == BO_SubAssign) { + Step = + AssertSuccess(BuildUnaryOp(nullptr, {}, UO_Minus, IncBin->getRHS())); + } else + llvm_unreachable("unhandled binary increment operator"); + } else if (auto *CondCXXOp = dyn_cast(Inc)) { + switch (CondCXXOp->getOperator()) { + case OO_PlusPlus: + Step = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 1), LogicalTy, {}); + break; + case OO_MinusMinus: + Step = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), -1), LogicalTy, {}); + break; + case OO_PlusEqual: + Step = CondCXXOp->getArg(1); + break; + case OO_MinusEqual: + Step = AssertSuccess( + BuildUnaryOp(nullptr, {}, UO_Minus, CondCXXOp->getArg(1))); + break; + default: + llvm_unreachable("unhandled overloaded increment operator"); + } + } else + llvm_unreachable("unknown increment expression"); + + CapturedStmt *DistanceFunc = + buildDistanceFunc(*this, LogicalTy, CondRel, LHS, RHS, Step); + CapturedStmt *LoopVarFunc = buildLoopVarFunc( + *this, LVTy, LogicalTy, CounterRef, Step, isa(AStmt)); + DeclRefExpr *LVRef = BuildDeclRefExpr(LVDecl, LVDecl->getType(), VK_LValue, + {}, nullptr, nullptr, {}, nullptr); + if (auto *For = dyn_cast(AStmt)) { + return ForStmt::create( + Ctx, For->getInit(), For->getCond(), For->getConditionVariable(), + For->getInc(), For->getBody(), For->getForLoc(), For->getLParenLoc(), + For->getRParenLoc(), DistanceFunc, LoopVarFunc, LVRef); + } else if (auto *For = dyn_cast(AStmt)) { + return CXXForRangeStmt::create( + Ctx, For->getInit(), For->getRangeStmt(), For->getBeginStmt(), + For->getEndStmt(), For->getCond(), For->getInc(), For->getLoopVarStmt(), + For->getBody(), For->getForLoc(), For->getCoawaitLoc(), + For->getColonLoc(), For->getRParenLoc(), DistanceFunc, LoopVarFunc, + LVRef); + } else + llvm_unreachable("unsupported"); +} + StmtResult Sema::ActOnOpenMPExecutableDirective( OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName, OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -1862,9 +1862,9 @@ if (isa(Body)) getCurCompoundScope().setHasEmptyLoopBodies(); - return new (Context) - ForStmt(Context, First, Second.get().second, Second.get().first, Third, - Body, ForLoc, LParenLoc, RParenLoc); + return ForStmt::create(Context, First, Second.get().second, + Second.get().first, Third, Body, ForLoc, LParenLoc, + RParenLoc); } /// In an Objective C collection iteration statement: @@ -2730,11 +2730,11 @@ if (getLangOpts().OpenMP >= 50 && BeginDeclStmt.isUsable()) ActOnOpenMPLoopInitialization(ForLoc, BeginDeclStmt.get()); - return new (Context) CXXForRangeStmt( - InitStmt, RangeDS, cast_or_null(BeginDeclStmt.get()), + return CXXForRangeStmt::create( + Context, InitStmt, RangeDS, cast_or_null(BeginDeclStmt.get()), cast_or_null(EndDeclStmt.get()), NotEqExpr.get(), - IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, CoawaitLoc, - ColonLoc, RParenLoc); + IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, CoawaitLoc, ColonLoc, + RParenLoc); } /// FinishObjCForCollectionStmt - Attach the body to a objective-C foreach diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -8350,6 +8350,9 @@ else CS = D->getInnermostCapturedStmt()->getCapturedStmt(); Body = getDerived().TransformStmt(CS); + if (Body.isUsable() && isOpenMPLoopDirective(D->getDirectiveKind()) && + getSema().getLangOpts().OpenMPIRBuilder) + Body = getSema().ActOnOpenMPCanonicalLoop(Body.get()); } AssociatedStmt = getDerived().getSema().ActOnOpenMPRegionEnd(Body, TClauses); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -298,6 +298,10 @@ S->setForLoc(readSourceLocation()); S->setLParenLoc(readSourceLocation()); S->setRParenLoc(readSourceLocation()); + + S->setDistanceFunc(Record.readSubStmt()); + S->setLoopVarFunc(Record.readSubStmt()); + S->setLoopVarRef(Record.readSubExpr()); } void ASTStmtReader::VisitGotoStmt(GotoStmt *S) { @@ -1662,6 +1666,10 @@ S->setInc(Record.readSubExpr()); S->setLoopVarStmt(Record.readSubStmt()); S->setBody(Record.readSubStmt()); + + S->setDistanceFunc(Record.readSubStmt()); + S->setLoopVarFunc(Record.readSubStmt()); + S->setLoopVarRef(Record.readSubExpr()); } void ASTStmtReader::VisitMSDependentExistsStmt(MSDependentExistsStmt *S) { @@ -2724,7 +2732,7 @@ break; case STMT_FOR: - S = new (Context) ForStmt(Empty); + S = ForStmt::createEmpty(Context); break; case STMT_GOTO: @@ -3120,7 +3128,7 @@ break; case STMT_CXX_FOR_RANGE: - S = new (Context) CXXForRangeStmt(Empty); + S = CXXForRangeStmt::createEmpty(Context); break; case STMT_MS_DEPENDENT_EXISTS: diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -223,6 +223,11 @@ Record.AddSourceLocation(S->getForLoc()); Record.AddSourceLocation(S->getLParenLoc()); Record.AddSourceLocation(S->getRParenLoc()); + + Record.AddStmt(S->getDistanceFunc()); + Record.AddStmt(S->getLoopVarFunc()); + Record.AddStmt(S->getLoopVarRef()); + Code = serialization::STMT_FOR; } @@ -1544,6 +1549,11 @@ Record.AddStmt(S->getInc()); Record.AddStmt(S->getLoopVarStmt()); Record.AddStmt(S->getBody()); + + Record.AddStmt(S->getDistanceFunc()); + Record.AddStmt(S->getLoopVarFunc()); + Record.AddStmt(S->getLoopVarRef()); + Code = serialization::STMT_CXX_FOR_RANGE; } diff --git a/clang/test/OpenMP/irbuilder_for_iterator.cpp b/clang/test/OpenMP/irbuilder_for_iterator.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/irbuilder_for_iterator.cpp @@ -0,0 +1,147 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +struct MyIterator { + MyIterator(unsigned pos); + MyIterator(const MyIterator &other); + const MyIterator &operator=(const MyIterator &that); + MyIterator &operator++(); + int operator-(const MyIterator &that) const; + MyIterator &operator+=(unsigned a); + MyIterator operator+(unsigned a) const; + bool operator==(const MyIterator &that) const; + bool operator!=(const MyIterator &that) const; + unsigned operator*() const; +}; + +extern "C" void workshareloop_iterator(float *a, float *b, float *c) { +#pragma omp for + for (MyIterator it = MyIterator(7); it != MyIterator(41); ++it) { + unsigned i = *it; + a[i] = b[i] * c[i]; + } +} + +#endif // HEADER +// CHECK-LABEL: define {{[^@]+}}@workshareloop_iterator +// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) [[ATTR0:#.*]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[IT:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN10MyIteratorC1Ej(%struct.MyIterator* nonnull dereferenceable(1) [[IT]], i32 7) +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store %struct.MyIterator* [[IT]], %struct.MyIterator** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[IT]]) +// CHECK-NEXT: call void @__captured_stmt(i64* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i64, i64* [[DOTCOUNT_ADDR]], align 8 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i64 0, i64* [[P_LOWERBOUND]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DOTCOUNT]], 1 +// CHECK-NEXT: store i64 [[TMP2]], i64* [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: store i64 1, i64* [[P_STRIDE]], align 8 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[TMP3]] +// CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i64 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP6]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP3]] +// CHECK-NEXT: call void @__captured_stmt.1(%struct.MyIterator* [[IT]], i64 [[TMP7]], %struct.anon.0* [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[IT]]) +// CHECK-NEXT: store i32 [[CALL]], i32* [[I]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP10]], [[TMP13]] +// CHECK-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM4]] +// CHECK-NEXT: store float [[MUL]], float* [[ARRAYIDX5]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i64 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR2:#.*]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1 +// CHECK-NEXT: store i64* [[DISTANCE]], i64** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN10MyIteratorC1Ej(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]], i32 41) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP1]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratormiERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[TMP2]]) +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[CONV]], 1 +// CHECK-NEXT: [[TMP3:%.*]] = load i64*, i64** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i64 [[DIV]], i64* [[TMP3]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (%struct.MyIterator* nonnull align 1 dereferenceable(1) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca %struct.MyIterator*, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1 +// CHECK-NEXT: store %struct.MyIterator* [[LOOPVAR]], %struct.MyIterator** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i64 [[LOGICAL]], i64* [[LOGICAL_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[LOGICAL_ADDR]], align 8 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 1, [[TMP2]] +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[MUL]] to i32 +// CHECK-NEXT: call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]]) +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call nonnull align 1 dereferenceable(1) %struct.MyIterator* @_ZN10MyIteratoraSERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP3]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_for_rangefor.cpp b/clang/test/OpenMP/irbuilder_for_rangefor.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/irbuilder_for_rangefor.cpp @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +struct MyIterator { + MyIterator(unsigned pos); + MyIterator(const MyIterator &other); + const MyIterator &operator=(const MyIterator &that); + MyIterator &operator++(); + int operator-(const MyIterator &that) const; + MyIterator &operator+=(unsigned a); + MyIterator operator+(unsigned a) const; + bool operator==(const MyIterator &that) const; + bool operator!=(const MyIterator &that) const; + unsigned operator*() const; +}; + +struct MyRange { + MyRange(int n); + + MyIterator begin(); + MyIterator end(); +}; + +extern "C" void workshareloop_rangefor(float *a, float *b, float *c) { +#pragma omp for + for (unsigned i : MyRange(42)) { + a[i] = b[i] * c[i]; + } +} + +#endif // HEADER +// CHECK-LABEL: define {{[^@]+}}@workshareloop_rangefor +// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) [[ATTR0:#.*]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[__RANGE2:%.*]] = alloca %struct.MyRange*, align 8 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_MYRANGE:%.*]], align 1 +// CHECK-NEXT: [[__BEGIN2:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1 +// CHECK-NEXT: [[__END2:%.*]] = alloca [[STRUCT_MYITERATOR]], align 1 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN7MyRangeC1Ei(%struct.MyRange* nonnull dereferenceable(1) [[REF_TMP]], i32 42) +// CHECK-NEXT: store %struct.MyRange* [[REF_TMP]], %struct.MyRange** [[__RANGE2]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8 +// CHECK-NEXT: call void @_ZN7MyRange5beginEv(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[__BEGIN2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8 +// CHECK-NEXT: call void @_ZN7MyRange3endEv(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[__END2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP1]]) +// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[__BEGIN2]]) +// CHECK-NEXT: store i32 [[CALL]], i32* [[I]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store %struct.MyIterator* [[__BEGIN2]], %struct.MyIterator** [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.MyIterator* [[__END2]], %struct.MyIterator** [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP4]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[__BEGIN2]]) +// CHECK-NEXT: call void @__captured_stmt(i64* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i64, i64* [[DOTCOUNT_ADDR]], align 8 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i64 0, i64* [[P_LOWERBOUND]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[DOTCOUNT]], 1 +// CHECK-NEXT: store i64 [[TMP5]], i64* [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: store i64 1, i64* [[P_STRIDE]], align 8 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], [[TMP6]] +// CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i64 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP9]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP6]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I]], i64 [[TMP10]], %struct.anon.0* [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP11:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP13]], [[TMP16]] +// CHECK-NEXT: [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM4]] +// CHECK-NEXT: store float [[MUL]], float* [[ARRAYIDX5]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i64 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR2:#.*]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: store i64* [[DISTANCE]], i64** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP3]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratormiERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP2]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[TMP4]]) +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[CONV]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = load i64*, i64** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i64 [[DIV]], i64* [[TMP5]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1 +// CHECK-NEXT: store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i64 [[LOGICAL]], i64* [[LOGICAL_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[LOGICAL_ADDR]], align 8 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 1, [[TMP2]] +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[MUL]] to i32 +// CHECK-NEXT: call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]]) +// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]]) +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[CALL]], i32* [[TMP3]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_for_unsigned.c b/clang/test/OpenMP/irbuilder_for_unsigned.c new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/irbuilder_for_unsigned.c @@ -0,0 +1,147 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +extern "C" void workshareloop_unsigned(float *a, float *b, float *c, float *d) { +#pragma omp for + for (unsigned i = 33; i < 32000000; i += 7) { + a[i] = b[i] * c[i] * d[i]; + } +} + +#endif // HEADER +// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned +// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) [[ATTR0:#.*]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 33, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I]], i32** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP7]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP4]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I]], i32 [[TMP8]], %struct.anon.0* [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP11]], [[TMP14]] +// CHECK-NEXT: [[TMP15:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP17]] +// CHECK-NEXT: [[TMP18:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], float* [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR1:#.*]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: store i32* [[DISTANCE]], i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP3]], 32000000 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 32000000, [[TMP6]] +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 7 +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], i32* [[TMP7]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 7, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -300,6 +300,12 @@ bool NeedsBarrier, Value *Chunk = nullptr); + /// Modifies the canonical loop to be a workshare loop. + CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier); + /// Tile a loop nest. /// /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -971,7 +971,8 @@ // Emit the body content. We do it after connecting the loop to the CFG to // avoid that the callback encounters degenerate BBs. - BodyGenCB(CL->getBodyIP(), CL->getIndVar()); + if (BodyGenCB) + BodyGenCB(CL->getBodyIP(), CL->getIndVar()); #ifndef NDEBUG CL->assertOK(); @@ -1164,6 +1165,13 @@ return CLI; } +CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop( + const LocationDescription &Loc, CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, bool NeedsBarrier) { + // Currently only supports static schedules. + return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier); +} + /// Make \p Source branch to \p Target. /// /// Handles two situations: