diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -356,6 +356,9 @@ /// class OMPParallelDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel; @@ -381,6 +384,9 @@ SourceLocation(), NumClauses, 1), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -392,11 +398,14 @@ /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPParallelDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a N clauses. /// @@ -406,6 +415,10 @@ static OMPParallelDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1258,7 +1271,9 @@ /// class OMPForDirective : public OMPLoopDirective { friend class ASTStmtReader; - + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current directive has inner cancel directive. bool HasCancel; @@ -1286,6 +1301,9 @@ NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1299,13 +1317,15 @@ /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPForDirective *Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, const HelperExprs &Exprs, - bool HasCancel); + Expr *TaskRedRef, bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -1317,6 +1337,10 @@ static OMPForDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1403,6 +1427,9 @@ class OMPSectionsDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current directive has inner cancel directive. bool HasCancel; @@ -1429,6 +1456,9 @@ SourceLocation(), NumClauses, 1), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1440,11 +1470,14 @@ /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner directive. /// static OMPSectionsDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -1455,6 +1488,10 @@ static OMPSectionsDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1715,6 +1752,9 @@ class OMPParallelForDirective : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current region has inner cancel directive. bool HasCancel; @@ -1743,6 +1783,9 @@ SourceLocation(), CollapsedNum, NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1756,12 +1799,15 @@ /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -1775,6 +1821,10 @@ unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1863,6 +1913,10 @@ class OMPParallelMasterDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; + OMPParallelMasterDirective(SourceLocation StartLoc, SourceLocation EndLoc, unsigned NumClauses) : OMPExecutableDirective(this, OMPParallelMasterDirectiveClass, @@ -1875,6 +1929,9 @@ SourceLocation(), SourceLocation(), NumClauses, 1) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + public: /// Creates directive with a list of \a Clauses. /// @@ -1883,10 +1940,12 @@ /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// static OMPParallelMasterDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -1897,6 +1956,10 @@ static OMPParallelMasterDirective * CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + static bool classof(const Stmt *T) { return T->getStmtClass() == OMPParallelMasterDirectiveClass; } @@ -1914,6 +1977,9 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current directive has inner cancel directive. bool HasCancel; @@ -1941,6 +2007,9 @@ 1), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1952,11 +2021,14 @@ /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPParallelSectionsDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -1967,6 +2039,10 @@ static OMPParallelSectionsDirective * CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -2805,6 +2881,10 @@ /// class OMPTargetParallelDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; + /// Build directive with the given start and end location. /// /// \param StartLoc Starting location of the directive kind. @@ -2827,6 +2907,9 @@ SourceLocation(), SourceLocation(), NumClauses, /*NumChildren=*/1) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + public: /// Creates directive with a list of \a Clauses. /// @@ -2835,10 +2918,12 @@ /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// static OMPTargetParallelDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -2849,6 +2934,10 @@ static OMPTargetParallelDirective * CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + static bool classof(const Stmt *T) { return T->getStmtClass() == OMPTargetParallelDirectiveClass; } @@ -2866,6 +2955,9 @@ class OMPTargetParallelForDirective : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current region has inner cancel directive. bool HasCancel; @@ -2895,6 +2987,9 @@ SourceLocation(), CollapsedNum, NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -2908,12 +3003,15 @@ /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPTargetParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -2927,6 +3025,10 @@ unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -3699,6 +3801,9 @@ /// class OMPDistributeParallelForDirective : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel = false; @@ -3730,6 +3835,9 @@ NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -3743,12 +3851,15 @@ /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPDistributeParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -3762,6 +3873,10 @@ unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -4264,6 +4379,9 @@ /// class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel = false; @@ -4296,6 +4414,9 @@ NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -4309,12 +4430,15 @@ /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPTeamsDistributeParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses clauses. /// @@ -4326,6 +4450,10 @@ CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -4473,6 +4601,9 @@ class OMPTargetTeamsDistributeParallelForDirective final : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel = false; @@ -4506,6 +4637,9 @@ SourceLocation(), SourceLocation(), CollapsedNum, NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -4519,12 +4653,15 @@ /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPTargetTeamsDistributeParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses clauses. /// @@ -4536,6 +4673,10 @@ CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10230,6 +10230,9 @@ def err_omp_non_pointer_type_array_shaping_base : Error< "expected expression with a pointer to a complete type as a base of an array " "shaping operation">; +def err_omp_reduction_task_not_parallel_or_worksharing : Error< + "'reduction' clause with 'task' modifier allowed only on non-simd parallel or" + " worksharing constructs">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -148,6 +148,7 @@ // Modifiers for 'reduction' clause. OPENMP_REDUCTION_MODIFIER(default) OPENMP_REDUCTION_MODIFIER(inscan) +OPENMP_REDUCTION_MODIFIER(task) #undef OPENMP_REDUCTION_MODIFIER #undef OPENMP_DEVICE_MODIFIER diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -161,7 +161,8 @@ OMPParallelDirective *OMPParallelDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPParallelDirective), alignof(OMPClause *)); void *Mem = @@ -170,6 +171,7 @@ new (Mem) OMPParallelDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -227,11 +229,10 @@ return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses); } -OMPForDirective * -OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation EndLoc, unsigned CollapsedNum, - ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { +OMPForDirective *OMPForDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPForDirective), alignof(OMPClause *)); void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + @@ -264,6 +265,7 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -334,7 +336,8 @@ OMPSectionsDirective *OMPSectionsDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPSectionsDirective), alignof(OMPClause *)); void *Mem = @@ -343,6 +346,7 @@ new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -449,7 +453,7 @@ OMPParallelForDirective *OMPParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + @@ -483,6 +487,7 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -552,7 +557,7 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef) { unsigned Size = llvm::alignTo(sizeof(OMPParallelMasterDirective), alignof(OMPClause *)); void *Mem = @@ -561,6 +566,7 @@ new (Mem) OMPParallelMasterDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); return Dir; } @@ -576,7 +582,8 @@ OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPParallelSectionsDirective), alignof(OMPClause *)); void *Mem = @@ -585,6 +592,7 @@ new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -887,7 +895,7 @@ OMPTargetParallelDirective *OMPTargetParallelDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef) { unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelDirective), alignof(OMPClause *)); void *Mem = @@ -896,6 +904,7 @@ new (Mem) OMPTargetParallelDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); return Dir; } @@ -912,7 +921,7 @@ OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate( @@ -946,6 +955,7 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -1456,7 +1466,7 @@ OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate( @@ -1505,6 +1515,7 @@ Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->HasCancel = HasCancel; return Dir; } @@ -1931,7 +1942,7 @@ OMPTeamsDistributeParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { auto Size = llvm::alignTo(sizeof(OMPTeamsDistributeParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate( @@ -1980,6 +1991,7 @@ Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->HasCancel = HasCancel; return Dir; } @@ -2083,7 +2095,7 @@ OMPTargetTeamsDistributeParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { auto Size = llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForDirective), alignof(OMPClause *)); @@ -2134,6 +2146,7 @@ Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->HasCancel = HasCancel; return Dir; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -100,6 +100,7 @@ SmallVector LastprivateVars; SmallVector LastprivateCopies; SmallVector ReductionVars; + SmallVector ReductionOrigs; SmallVector ReductionCopies; SmallVector ReductionOps; struct DependData { @@ -118,6 +119,8 @@ unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; + bool IsReductionWithTaskMod = false; + bool IsWorksharingReduction = false; }; /// Class intended to support codegen of all kind of the reduction clauses. @@ -1418,18 +1421,34 @@ /// should be emitted for reduction: /// \code /// - /// _task_red_item_t red_data[n]; + /// _taskred_item_t red_data[n]; /// ... - /// red_data[i].shar = &origs[i]; + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; /// red_data[i].size = sizeof(origs[i]); /// red_data[i].f_init = (void*)RedInit; /// red_data[i].f_fini = (void*)RedDest; /// red_data[i].f_comb = (void*)RedOp; /// red_data[i].flags = ; /// ... - /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data); /// \endcode + /// For reduction clause with task modifier it emits the next call: + /// \code /// + /// _taskred_item_t red_data[n]; + /// ... + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit; + /// red_data[i].f_fini = (void*)RedDest; + /// red_data[i].f_comb = (void*)RedOp; + /// red_data[i].flags = ; + /// ... + /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n, + /// red_data); + /// \endcode /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. /// \param Data Additional data for task generation like tiedness, final @@ -1440,6 +1459,13 @@ ArrayRef RHSExprs, const OMPTaskDataTy &Data); + /// Emits the following code for reduction clause with task modifier: + /// \code + /// __kmpc_taskred_modifier_fini(loc, gtid, is_worksharing); + /// \endcode + virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, + bool IsWorksharingReduction); + /// Required to resolve existing problems in the runtime. Emits threadprivate /// variables to store the size of the VLAs/array sections for /// initializer/combiner/finalizer functions. @@ -2192,18 +2218,34 @@ /// should be emitted for reduction: /// \code /// - /// _task_red_item_t red_data[n]; + /// _taskred_item_t red_data[n]; /// ... - /// red_data[i].shar = &origs[i]; + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; /// red_data[i].size = sizeof(origs[i]); /// red_data[i].f_init = (void*)RedInit; /// red_data[i].f_fini = (void*)RedDest; /// red_data[i].f_comb = (void*)RedOp; /// red_data[i].flags = ; /// ... - /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data); /// \endcode + /// For reduction clause with task modifier it emits the next call: + /// \code /// + /// _taskred_item_t red_data[n]; + /// ... + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit; + /// red_data[i].f_fini = (void*)RedDest; + /// red_data[i].f_comb = (void*)RedOp; + /// red_data[i].flags = ; + /// ... + /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n, + /// red_data); + /// \endcode /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. /// \param Data Additional data for task generation like tiedness, final @@ -2213,6 +2255,13 @@ ArrayRef RHSExprs, const OMPTaskDataTy &Data) override; + /// Emits the following code for reduction clause with task modifier: + /// \code + /// __kmpc_taskred_modifier_fini(loc, gtid, is_worksharing); + /// \endcode + void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, + bool IsWorksharingReduction) override; + /// Required to resolve existing problems in the runtime. Emits threadprivate /// variables to store the size of the VLAs/array sections for /// initializer/combiner/finalizer functions + emits threadprivate variable to diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -710,6 +710,12 @@ // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); OMPRTL__kmpc_task_reduction_get_th_data, + // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + OMPRTL__kmpc_taskred_modifier_init, + // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, + // int is_ws); + OMPRTL__kmpc_task_reduction_modifier_fini, // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); OMPRTL__kmpc_alloc, // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); @@ -1020,26 +1026,25 @@ bool AsArraySection = isa(ClausesData[N].Ref); if (!PrivateType->isVariablyModifiedType()) { Sizes.emplace_back( - CGF.getTypeSize( - SharedAddresses[N].first.getType().getNonReferenceType()), + CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), nullptr); return; } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = cast( - SharedAddresses[N].first.getPointer(CGF)->getType()) - ->getElementType(); + auto *ElemType = + cast(OrigAddresses[N].first.getPointer(CGF)->getType()) + ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), - SharedAddresses[N].first.getPointer(CGF)); + Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), + OrigAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); } else { - SizeInChars = CGF.getTypeSize( - SharedAddresses[N].first.getType().getNonReferenceType()); + SizeInChars = + CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); } Sizes.emplace_back(SizeInChars, Size); @@ -2345,6 +2350,28 @@ FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); break; } + case OMPRTL__kmpc_taskred_modifier_init: { + // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num_data, void *data); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy, + CGM.IntTy, CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, + /*Name=*/"__kmpc_taskred_modifier_init"); + break; + } + case OMPRTL__kmpc_task_reduction_modifier_fini: { + // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, + // int is_ws); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, + /*Name=*/"__kmpc_task_reduction_modifier_fini"); + break; + } case OMPRTL__kmpc_alloc: { // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t // al); omp_allocator_handle_t type is void *. @@ -6782,7 +6809,7 @@ RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); - ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars, + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, Data.ReductionCopies, Data.ReductionOps); for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; @@ -6846,6 +6873,22 @@ CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), FlagsLVal.getType()); } + if (Data.IsReductionWithTaskMod) { + // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, /*isSigned=*/true); + llvm::Value *Args[] = { + IdentTLoc, GTid, + llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskRedInput.getPointer(), CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args); + } // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); llvm::Value *Args[] = { CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, @@ -6857,6 +6900,22 @@ Args); } +void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, + SourceLocation Loc, + bool IsWorksharingReduction) { + // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, /*isSigned=*/true); + llvm::Value *Args[] = {IdentTLoc, GTid, + llvm::ConstantInt::get(CGM.IntTy, + IsWorksharingReduction ? 1 : 0, + /*isSigned=*/true)}; + (void)CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args); +} + void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, @@ -12362,6 +12421,12 @@ llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, + SourceLocation Loc, + bool IsWorksharingReduction) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1169,21 +1169,23 @@ SmallVector ReductionOps; SmallVector LHSs; SmallVector RHSs; + OMPTaskDataTy Data; + SmallVector TaskLHSs; + SmallVector TaskRHSs; for (const auto *C : D.getClausesOfKind()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Shareds.emplace_back(Ref); - Privates.emplace_back(*IPriv); - ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + if (C->getModifier() == OMPC_REDUCTION_task) { + Data.ReductionVars.append(C->privates().begin(), C->privates().end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } } ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); @@ -1261,6 +1263,117 @@ ++IPriv; ++Count; } + if (!Data.ReductionVars.empty()) { + Data.IsReductionWithTaskMod = true; + Data.IsWorksharingReduction = + isOpenMPWorksharingDirective(D.getDirectiveKind()); + llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); + const Expr *TaskRedRef = nullptr; + switch (D.getDirectiveKind()) { + case OMPD_parallel: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_for: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_sections: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_for: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_master: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_sections: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel_for: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_distribute_parallel_for: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_teams_distribute_parallel_for: + TaskRedRef = cast(D) + .getTaskReductionRefExpr(); + break; + case OMPD_target_teams_distribute_parallel_for: + TaskRedRef = cast(D) + .getTaskReductionRefExpr(); + break; + case OMPD_simd: + case OMPD_for_simd: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_parallel_for_simd: + case OMPD_task: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: + case OMPD_ordered: + case OMPD_atomic: + case OMPD_teams: + case OMPD_target: + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_target_data: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: + case OMPD_distribute: + case OMPD_target_update: + case OMPD_distribute_parallel_for_simd: + case OMPD_distribute_simd: + case OMPD_target_parallel_for_simd: + case OMPD_target_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams_distribute_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_declare_simd: + case OMPD_requires: + case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: + case OMPD_unknown: + llvm_unreachable("Enexpected directive with task reductions."); + } + + const auto *VD = cast(cast(TaskRedRef)->getDecl()); + EmitVarDecl(*VD); + EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), + /*Volatile=*/false, TaskRedRef->getType()); + } } void CodeGenFunction::EmitOMPReductionClauseFinal( @@ -1272,14 +1385,23 @@ llvm::SmallVector RHSExprs; llvm::SmallVector ReductionOps; bool HasAtLeastOneReduction = false; + bool IsReductionWithTaskMod = false; + isOpenMPWorksharingDirective(D.getDirectiveKind()); for (const auto *C : D.getClausesOfKind()) { HasAtLeastOneReduction = true; Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + IsReductionWithTaskMod = + IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; } if (HasAtLeastOneReduction) { + if (IsReductionWithTaskMod) { + CGM.getOpenMPRuntime().emitTaskReductionFini( + *this, D.getBeginLoc(), + isOpenMPWorksharingDirective(D.getDirectiveKind())); + } bool WithNowait = D.getSingleClause() || isOpenMPParallelDirective(D.getDirectiveKind()) || ReductionKind == OMPD_simd; @@ -3382,21 +3504,13 @@ SmallVector LHSs; SmallVector RHSs; for (const auto *C : S.getClausesOfKind()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( *this, S.getBeginLoc(), LHSs, RHSs, Data); @@ -3776,21 +3890,13 @@ SmallVector RHSs; OMPTaskDataTy Data; for (const auto *C : S.getClausesOfKind()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } llvm::Value *ReductionDesc = CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -512,11 +512,15 @@ getTopMostTaskgroupReductionData(const ValueDecl *D, SourceRange &SR, const Expr *&ReductionRef, Expr *&TaskgroupDescriptor) const; - /// Return reduction reference expression for the current taskgroup. + /// Return reduction reference expression for the current taskgroup or + /// parallel/worksharing directives with task reductions. Expr *getTaskgroupReductionRef() const { - assert(getTopOfStack().Directive == OMPD_taskgroup && - "taskgroup reference expression requested for non taskgroup " - "directive."); + assert((getTopOfStack().Directive == OMPD_taskgroup || + ((isOpenMPParallelDirective(getTopOfStack().Directive) || + isOpenMPWorksharingDirective(getTopOfStack().Directive)) && + !isOpenMPSimdDirective(getTopOfStack().Directive))) && + "taskgroup reference expression requested for non taskgroup or " + "parallel/worksharing directive."); return getTopOfStack().TaskgroupReductionRef; } /// Checks if the given \p VD declaration is actually a taskgroup reduction @@ -1331,7 +1335,10 @@ "Additional reduction info may be specified only for reduction items."); ReductionData &ReductionData = getTopOfStack().ReductionMap[D]; assert(ReductionData.ReductionRange.isInvalid() && - getTopOfStack().Directive == OMPD_taskgroup && + (getTopOfStack().Directive == OMPD_taskgroup || + ((isOpenMPParallelDirective(getTopOfStack().Directive) || + isOpenMPWorksharingDirective(getTopOfStack().Directive)) && + !isOpenMPSimdDirective(getTopOfStack().Directive))) && "Additional reduction info may be specified only once for reduction " "items."); ReductionData.set(BOK, SR); @@ -1354,7 +1361,10 @@ "Additional reduction info may be specified only for reduction items."); ReductionData &ReductionData = getTopOfStack().ReductionMap[D]; assert(ReductionData.ReductionRange.isInvalid() && - getTopOfStack().Directive == OMPD_taskgroup && + (getTopOfStack().Directive == OMPD_taskgroup || + ((isOpenMPParallelDirective(getTopOfStack().Directive) || + isOpenMPWorksharingDirective(getTopOfStack().Directive)) && + !isOpenMPSimdDirective(getTopOfStack().Directive))) && "Additional reduction info may be specified only once for reduction " "items."); ReductionData.set(ReductionRef, SR); @@ -1375,7 +1385,8 @@ assert(!isStackEmpty() && "Data-sharing attributes stack is empty."); for (const_iterator I = begin() + 1, E = end(); I != E; ++I) { const DSAInfo &Data = I->SharingMap.lookup(D); - if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup) + if (Data.Attributes != OMPC_reduction || + Data.Modifier != OMPC_REDUCTION_task) continue; const ReductionData &ReductionData = I->ReductionMap.lookup(D); if (!ReductionData.ReductionOp || @@ -1387,8 +1398,8 @@ "expression for the descriptor is not " "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; - return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0); + return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); } return DSAVarData(); } @@ -1400,7 +1411,8 @@ assert(!isStackEmpty() && "Data-sharing attributes stack is empty."); for (const_iterator I = begin() + 1, E = end(); I != E; ++I) { const DSAInfo &Data = I->SharingMap.lookup(D); - if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup) + if (Data.Attributes != OMPC_reduction || + Data.Modifier != OMPC_REDUCTION_task) continue; const ReductionData &ReductionData = I->ReductionMap.lookup(D); if (!ReductionData.ReductionOp || @@ -1412,8 +1424,8 @@ "expression for the descriptor is not " "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; - return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0); + return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); } return DSAVarData(); } @@ -2206,7 +2218,12 @@ // Consider taskgroup reduction descriptor variable a private // to avoid possible capture in the region. (DSAStack->hasExplicitDirective( - [](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; }, + [](OpenMPDirectiveKind K) { + return K == OMPD_taskgroup || + ((isOpenMPParallelDirective(K) || + isOpenMPWorksharingDirective(K)) && + !isOpenMPSimdDirective(K)); + }, Level) && DSAStack->isTaskgroupReductionRef(D, Level))) ? OMPC_private @@ -2366,7 +2383,7 @@ /// Check consistency of the reduction clauses. static void checkReductionClauses(Sema &S, DSAStackTy *Stack, - ArrayRef Clauses) { + ArrayRef Clauses) { bool InscanFound = false; SourceLocation InscanLoc; // OpenMP 5.0, 2.19.5.4 reduction Clause, Restrictions. @@ -2380,7 +2397,21 @@ if (RC->getModifier() == OMPC_REDUCTION_inscan) { InscanFound = true; InscanLoc = RC->getModifierLoc(); - break; + continue; + } + if (RC->getModifier() == OMPC_REDUCTION_task) { + // OpenMP 5.0, 2.19.5.4 reduction Clause. + // A reduction clause with the task reduction-modifier may only appear on + // a parallel construct, a worksharing construct or a combined or + // composite construct for which any of the aforementioned constructs is a + // constituent construct and simd or loop are not constituent constructs. + OpenMPDirectiveKind CurDir = Stack->getCurrentDirective(); + if (!(isOpenMPParallelDirective(CurDir) || + isOpenMPWorksharingDirective(CurDir)) || + isOpenMPSimdDirective(CurDir)) + S.Diag(RC->getModifierLoc(), + diag::err_omp_reduction_task_not_parallel_or_worksharing); + continue; } } if (InscanFound) { @@ -4103,7 +4134,8 @@ SmallVector PICs; // This is required for proper codegen. for (OMPClause *Clause : Clauses) { - if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) && + if (!LangOpts.OpenMPSimd && + isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) && Clause->getClauseKind() == OMPC_in_reduction) { // Capture taskgroup task_reduction descriptors inside the tasking regions // with the corresponding in_reduction items. @@ -6009,6 +6041,7 @@ setFunctionHasBranchProtectedScope(); return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } @@ -8492,8 +8525,9 @@ } setFunctionHasBranchProtectedScope(); - return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount, - Clauses, AStmt, B, DSAStack->isCancelRegion()); + return OMPForDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPForSimdDirective( @@ -8570,6 +8604,7 @@ setFunctionHasBranchProtectedScope(); return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } @@ -8730,9 +8765,9 @@ } setFunctionHasBranchProtectedScope(); - return OMPParallelForDirective::Create(Context, StartLoc, EndLoc, - NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + return OMPParallelForDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPParallelForSimdDirective( @@ -8796,8 +8831,9 @@ setFunctionHasBranchProtectedScope(); - return OMPParallelMasterDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPParallelMasterDirective::Create( + Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef()); } StmtResult @@ -8836,7 +8872,8 @@ setFunctionHasBranchProtectedScope(); return OMPParallelSectionsDirective::Create( - Context, StartLoc, EndLoc, Clauses, AStmt, DSAStack->isCancelRegion()); + Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } /// detach and mergeable clauses are mutially exclusive, check for it. @@ -9888,8 +9925,9 @@ setFunctionHasBranchProtectedScope(); - return OMPTargetParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPTargetParallelDirective::Create( + Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef()); } StmtResult Sema::ActOnOpenMPTargetParallelForDirective( @@ -9941,9 +9979,9 @@ } setFunctionHasBranchProtectedScope(); - return OMPTargetParallelForDirective::Create(Context, StartLoc, EndLoc, - NestedLoopCount, Clauses, AStmt, - B, DSAStack->isCancelRegion()); + return OMPTargetParallelForDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } /// Check for existence of a map clause in the list of clauses. @@ -10555,7 +10593,7 @@ setFunctionHasBranchProtectedScope(); return OMPDistributeParallelForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective( @@ -10996,7 +11034,7 @@ return OMPTeamsDistributeParallelForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, @@ -11125,7 +11163,7 @@ setFunctionHasBranchProtectedScope(); return OMPTargetTeamsDistributeParallelForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( @@ -14969,9 +15007,13 @@ } // All reduction items are still marked as reduction (to do not increase // code base size). - Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, - RD.RedModifier); - if (CurrDir == OMPD_taskgroup) { + unsigned Modifier = RD.RedModifier; + // Consider task_reductions as reductions with task modifier. Required for + // correct analysis of in_reduction clauses. + if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction) + Modifier = OMPC_REDUCTION_task; + Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier); + if (Modifier == OMPC_REDUCTION_task) { if (DeclareReductionRef.isUsable()) Stack->addTaskgroupReductionData(D, ReductionIdRange, DeclareReductionRef.get()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2308,6 +2308,7 @@ // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2317,6 +2318,7 @@ void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2329,6 +2331,7 @@ // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2360,6 +2363,7 @@ void ASTStmtReader::VisitOMPParallelForDirective(OMPParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2374,6 +2378,7 @@ // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); } void ASTStmtReader::VisitOMPParallelSectionsDirective( @@ -2382,6 +2387,7 @@ // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2489,11 +2495,13 @@ VisitStmt(D); Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); } void ASTStmtReader::VisitOMPTargetParallelForDirective( OMPTargetParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2563,6 +2571,7 @@ void ASTStmtReader::VisitOMPDistributeParallelForDirective( OMPDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2603,6 +2612,7 @@ void ASTStmtReader::VisitOMPTeamsDistributeParallelForDirective( OMPTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2621,6 +2631,7 @@ void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective( OMPTargetTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2195,6 +2195,7 @@ VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_PARALLEL_DIRECTIVE; } @@ -2206,6 +2207,7 @@ void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_FOR_DIRECTIVE; } @@ -2219,6 +2221,7 @@ VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_SECTIONS_DIRECTIVE; } @@ -2253,6 +2256,7 @@ void ASTStmtWriter::VisitOMPParallelForDirective(OMPParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_PARALLEL_FOR_DIRECTIVE; } @@ -2268,6 +2272,7 @@ VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Code = serialization::STMT_OMP_PARALLEL_MASTER_DIRECTIVE; } @@ -2276,6 +2281,7 @@ VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE; } @@ -2336,12 +2342,14 @@ VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Code = serialization::STMT_OMP_TARGET_PARALLEL_DIRECTIVE; } void ASTStmtWriter::VisitOMPTargetParallelForDirective( OMPTargetParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE; } @@ -2475,6 +2483,7 @@ void ASTStmtWriter::VisitOMPDistributeParallelForDirective( OMPDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE; } @@ -2523,6 +2532,7 @@ void ASTStmtWriter::VisitOMPTeamsDistributeParallelForDirective( OMPTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE; } @@ -2543,6 +2553,7 @@ void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective( OMPTargetTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE; } diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target teams +#pragma omp distribute parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel +#pragma omp for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel master reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/parallel_reduction_messages.c b/clang/test/OpenMP/parallel_reduction_messages.c --- a/clang/test/OpenMP/parallel_reduction_messages.c +++ b/clang/test/OpenMP/parallel_reduction_messages.c @@ -10,7 +10,7 @@ ; #pragma omp parallel reduction(default, // expected-error {{expected identifier}} expected-error {{expected ')'}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-note {{to match this '('}} ; -#pragma omp parallel reduction(unknown, +: a) // expected-error {{expected 'default' or 'inscan' in OpenMP clause 'reduction'}} +#pragma omp parallel reduction(unknown, +: a) // expected-error {{expected 'default', 'inscan' or 'task' in OpenMP clause 'reduction'}} ; #pragma omp parallel reduction(default, + : a) ; diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -0,0 +1,133 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel sections reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel +#pragma omp sections reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target parallel reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target +#pragma omp teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif