Index: clang/include/clang/AST/StmtOpenMP.h =================================================================== --- clang/include/clang/AST/StmtOpenMP.h +++ clang/include/clang/AST/StmtOpenMP.h @@ -281,6 +281,15 @@ return Data->getClauses(); } + /// Was this directive mapped from an another directive? + /// e.g. 1) omp loop bind(parallel) is mapped to OMPD_for + /// 2) omp loop bind(teams) is mapped to OMPD_distribute + /// 3) omp loop bind(thread) is mapped to OMPD_simd + /// It was necessary to note it down in the Directive because of + /// clang::TreeTransform::TransformOMPExecutableDirective() pass in + /// the frontend. + OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown; + protected: /// Data, associated with the directive. OMPChildren *Data = nullptr; @@ -345,6 +354,10 @@ return Inst; } + void setMappedDirective(OpenMPDirectiveKind MappedDirective) { + PrevMappedDirective = MappedDirective; + } + public: /// Iterates over expressions/statements used in the construct. class used_clauses_child_iterator @@ -598,6 +611,8 @@ "Expected directive with the associated statement."); return Data->getRawStmt(); } + + OpenMPDirectiveKind getMappedDirective() const { return PrevMappedDirective; } }; /// This represents '#pragma omp parallel' directive. @@ -1604,7 +1619,9 @@ SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs); + const HelperExprs &Exprs, + OpenMPDirectiveKind ParamPrevMappedDirective = + llvm::omp::Directive::OMPD_unknown); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -1682,7 +1699,9 @@ SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, const HelperExprs &Exprs, - Expr *TaskRedRef, bool HasCancel); + Expr *TaskRedRef, bool HasCancel, + OpenMPDirectiveKind ParamPrevMappedDirective = + llvm::omp::Directive::OMPD_unknown); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -4406,7 +4425,9 @@ static OMPDistributeDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs); + Stmt *AssociatedStmt, const HelperExprs &Exprs, + OpenMPDirectiveKind ParamPrevMappedDirective = + llvm::omp::Directive::OMPD_unknown); /// Creates an empty directive with the place /// for \a NumClauses clauses. Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9859,6 +9859,11 @@ def warn_loop_ctrl_binds_to_inner : Warning< "'%0' is bound to current loop, GCC binds it to the enclosing loop">, InGroup; +def err_omp_bind_required_on_loop : Error< + "expected 'bind' clause for 'loop' construct without an enclosing OpenMP " + "construct">; +def err_omp_loop_reduction_clause : Error< + "'reduction' clause not allowed with '#pragma omp loop bind(teams)'">; def warn_break_binds_to_switch : Warning< "'break' is bound to loop, GCC binds it to switch">, InGroup; Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -11162,6 +11162,23 @@ /// All `omp assumes` we encountered so far. SmallVector OMPAssumeGlobal; + /// OMPD_loop is mapped to OMPD_for, OMPD_distribute or OMPD_simd depending + /// on the parameter of the bind clause. In the methods for the + /// mapped directives, check the parameters of the lastprivate clause. + bool checkLastPrivateForMappedDirectives(ArrayRef Clauses); + /// Depending on the bind clause of OMPD_loop map the directive to new + /// directives. + /// 1) loop bind(parallel) --> OMPD_for + /// 2) loop bind(teams) --> OMPD_distribute + /// 3) loop bind(thread) --> OMPD_simd + /// This is being handled in Sema instead of Codegen because of the need for + /// rigorous semantic checking in the new mapped directives. + bool mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, + ArrayRef Clauses, + OpenMPBindClauseKind BindKind, + OpenMPDirectiveKind &Kind, + OpenMPDirectiveKind &PrevMappedDirective); + public: /// The declarator \p D defines a function in the scope \p S which is nested /// in an `omp begin/end declare variant` scope. In this method we create a @@ -11457,7 +11474,8 @@ StmtResult ActOnOpenMPExecutableDirective( OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName, OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, + OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown); /// Called on well-formed '\#pragma omp parallel' after parsing /// of the associated statement. StmtResult ActOnOpenMPParallelDirective(ArrayRef Clauses, Index: clang/lib/AST/StmtOpenMP.cpp =================================================================== --- clang/lib/AST/StmtOpenMP.cpp +++ clang/lib/AST/StmtOpenMP.cpp @@ -297,11 +297,10 @@ /*NumChildren=*/1); } -OMPSimdDirective * -OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation EndLoc, unsigned CollapsedNum, - ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs) { +OMPSimdDirective *OMPSimdDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs, OpenMPDirectiveKind ParamPrevMappedDirective) { auto *Dir = createDirective( C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_simd), StartLoc, EndLoc, CollapsedNum); @@ -321,6 +320,7 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setMappedDirective(ParamPrevMappedDirective); return Dir; } @@ -336,7 +336,8 @@ OMPForDirective *OMPForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel, + OpenMPDirectiveKind ParamPrevMappedDirective) { auto *Dir = createDirective( C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_for) + 1, StartLoc, EndLoc, CollapsedNum); @@ -366,6 +367,7 @@ Dir->setPreInits(Exprs.PreInits); Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); + Dir->setMappedDirective(ParamPrevMappedDirective); return Dir; } @@ -1515,7 +1517,7 @@ OMPDistributeDirective *OMPDistributeDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs) { + const HelperExprs &Exprs, OpenMPDirectiveKind ParamPrevMappedDirective) { auto *Dir = createDirective( C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_distribute), StartLoc, EndLoc, @@ -1544,6 +1546,7 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setMappedDirective(ParamPrevMappedDirective); return Dir; } Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -163,6 +163,10 @@ SourceLocation DefaultAttrLoc; DefaultmapInfo DefaultmapMap[OMPC_DEFAULTMAP_unknown]; OpenMPDirectiveKind Directive = OMPD_unknown; + /// GenericLoopDirective with bind clause is mapped to other directives, + /// like for, distribute and simd. Presently, set MappedDirective to + /// OMPLoop. This may also be used in a similar way for other constructs. + OpenMPDirectiveKind MappedDirective = OMPD_unknown; DeclarationNameInfo DirectiveName; Scope *CurScope = nullptr; DeclContext *Context = nullptr; @@ -636,6 +640,24 @@ const SharingMapTy *Top = getTopOfStackOrNull(); return Top ? Top->Directive : OMPD_unknown; } + OpenMPDirectiveKind getMappedDirective() const { + const SharingMapTy *Top = getTopOfStackOrNull(); + return Top ? Top->MappedDirective : OMPD_unknown; + } + void setCurrentDirective(OpenMPDirectiveKind NewDK) { + SharingMapTy *Top = getTopOfStackOrNull(); + assert(Top && + "Before calling setCurrentDirective Top of Stack not to be NULL."); + // Store the old into MappedDirective & assign argument NewDK to Directive. + Top->Directive = NewDK; + } + void setMappedDirective(OpenMPDirectiveKind NewDK) { + SharingMapTy *Top = getTopOfStackOrNull(); + assert(Top && + "Before calling setMappedDirective Top of Stack not to be NULL."); + // Store the old into MappedDirective & assign argument NewDK to Directive. + Top->MappedDirective = NewDK; + } /// Returns directive kind at specified level. OpenMPDirectiveKind getDirective(unsigned Level) const { assert(!isStackEmpty() && "No directive at specified level."); @@ -5679,7 +5701,8 @@ // the step size, rounding-up the effective upper bound ensures that the // last iteration is included. // Note that the rounding-up may cause an overflow in a temporry that - // could be avoided, but would have occurred in a C-style for-loop as well. + // could be avoided, but would have occurred in a C-style for-loop as + // well. Expr *Divisor = BuildVarRef(NewStep); if (Rel == BO_GE || Rel == BO_GT) Divisor = @@ -6086,10 +6109,95 @@ } } +bool Sema::mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, + ArrayRef Clauses, + OpenMPBindClauseKind BindKind, + OpenMPDirectiveKind &Kind, + OpenMPDirectiveKind &PrevMappedDirective) { + + bool UseClausesWithoutBind = false; + + // Restricting to "#pragma omp loop bind" + if (getLangOpts().OpenMP >= 50 && Kind == OMPD_loop) { + if (BindKind == OMPC_BIND_unknown) { + // Setting the enclosing teams or parallel construct for the loop + // directive without bind clause. + BindKind = OMPC_BIND_thread; // Default bind(thread) if binding is unknown + + const OpenMPDirectiveKind ParentDirective = + DSAStack->getParentDirective(); + if (ParentDirective == OMPD_unknown) { + Diag(DSAStack->getDefaultDSALocation(), + diag::err_omp_bind_required_on_loop); + } else if (ParentDirective == OMPD_parallel || + ParentDirective == OMPD_target_parallel) { + BindKind = OMPC_BIND_parallel; + } else if (ParentDirective == OMPD_teams || + ParentDirective == OMPD_target_teams) { + BindKind = OMPC_BIND_teams; + } + } else { + // bind clause is present, so we should set flag indicating to only + // use the clauses that aren't the bind clause for the new directive that + // loop is lowered to. + UseClausesWithoutBind = true; + } + + for (OMPClause *C : Clauses) { + // Spec restriction : bind(teams) and reduction not permitted. + if (BindKind == OMPC_BIND_teams && + C->getClauseKind() == llvm::omp::Clause::OMPC_reduction) + Diag(DSAStack->getDefaultDSALocation(), + diag::err_omp_loop_reduction_clause); + + // A new Vector ClausesWithoutBind, which does not contain the bind + // clause, for passing to new directive. + if (C->getClauseKind() != llvm::omp::Clause::OMPC_bind) + ClausesWithoutBind.push_back(C); + } + + switch (BindKind) { + case OMPC_BIND_parallel: + Kind = OMPD_for; + DSAStack->setCurrentDirective(OMPD_for); + DSAStack->setMappedDirective(OMPD_loop); + PrevMappedDirective = OMPD_loop; + break; + case OMPC_BIND_teams: + Kind = OMPD_distribute; + DSAStack->setCurrentDirective(OMPD_distribute); + DSAStack->setMappedDirective(OMPD_loop); + PrevMappedDirective = OMPD_loop; + break; + case OMPC_BIND_thread: + Kind = OMPD_simd; + DSAStack->setCurrentDirective(OMPD_simd); + DSAStack->setMappedDirective(OMPD_loop); + PrevMappedDirective = OMPD_loop; + break; + case OMPC_BIND_unknown: + break; + } + } else if (PrevMappedDirective == OMPD_loop) { + /// An initial pass after recognizing all the statements is done in the + /// Parser when the directive OMPD_loop is mapped to OMPD_for, + /// OMPD_distribute or OMPD_simd. A second transform pass with call from + /// clang::TreeTransform::TransformOMPExecutableDirective() is done + /// with the Directive as one of the above mapped directive without + /// the bind clause. Then "PrevMappedDirective" stored in the + /// OMPExecutableDirective is accessed and hence this else statement. + + DSAStack->setMappedDirective(OMPD_loop); + } + + return UseClausesWithoutBind; +} + StmtResult Sema::ActOnOpenMPExecutableDirective( OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName, OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { + Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, + OpenMPDirectiveKind PrevMappedDirective) { StmtResult Res = StmtError(); OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; if (const OMPBindClause *BC = @@ -6106,10 +6214,21 @@ isOpenMPTargetDataManagementDirective(Kind))) Diag(StartLoc, diag::warn_hip_omp_target_directives); + llvm::SmallVector ClausesWithoutBind; + bool UseClausesWithoutBind = false; + + UseClausesWithoutBind = mapLoopConstruct(ClausesWithoutBind, Clauses, + BindKind, Kind, PrevMappedDirective); + llvm::SmallVector ClausesWithImplicit; VarsWithInheritedDSAType VarsWithInheritedDSA; bool ErrorFound = false; - ClausesWithImplicit.append(Clauses.begin(), Clauses.end()); + if (getLangOpts().OpenMP >= 50 && UseClausesWithoutBind) { + ClausesWithImplicit.append(ClausesWithoutBind.begin(), + ClausesWithoutBind.end()); + } else { + ClausesWithImplicit.append(Clauses.begin(), Clauses.end()); + } if (AStmt && !CurContext->isDependentContext() && Kind != OMPD_atomic && Kind != OMPD_critical && Kind != OMPD_section && Kind != OMPD_master && Kind != OMPD_masked && !isOpenMPLoopTransformationDirective(Kind)) { @@ -9203,9 +9322,13 @@ auto *CXXFor = dyn_cast_or_null(S); // Ranged for is supported only in OpenMP 5.0. if (!For && (SemaRef.LangOpts.OpenMP <= 45 || !CXXFor)) { + OpenMPDirectiveKind DK = (SemaRef.getLangOpts().OpenMP < 50 || + DSA.getMappedDirective() == OMPD_unknown) + ? DKind + : DSA.getMappedDirective(); SemaRef.Diag(S->getBeginLoc(), diag::err_omp_not_for) << (CollapseLoopCountExpr != nullptr || OrderedLoopCountExpr != nullptr) - << getOpenMPDirectiveName(DKind) << TotalNestedLoopCount + << getOpenMPDirectiveName(DK) << TotalNestedLoopCount << (CurrentNestedLoopCount > 0) << CurrentNestedLoopCount; if (TotalNestedLoopCount > 1) { if (CollapseLoopCountExpr && OrderedLoopCountExpr) @@ -10320,6 +10443,24 @@ return false; } +static bool checkGenericLoopLastprivate(Sema &S, ArrayRef Clauses, + OpenMPDirectiveKind K, + DSAStackTy *Stack); + +bool Sema::checkLastPrivateForMappedDirectives(ArrayRef Clauses) { + + // Check for syntax of lastprivate + // Param of the lastprivate have different meanings in the mapped directives + // e.g. "omp loop" Only loop iteration vars are allowed in lastprivate clause + // "omp for" lastprivate vars must be shared + if (getLangOpts().OpenMP >= 50 && + DSAStack->getMappedDirective() == OMPD_loop && + checkGenericLoopLastprivate(*this, Clauses, OMPD_loop, DSAStack)) { + return false; + } + return true; +} + StmtResult Sema::ActOnOpenMPSimdDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, @@ -10327,6 +10468,9 @@ if (!AStmt) return StmtError(); + if (!checkLastPrivateForMappedDirectives(Clauses)) + return StmtError(); + assert(isa(AStmt) && "Captured statement expected"); OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will @@ -10355,8 +10499,10 @@ return StmtError(); setFunctionHasBranchProtectedScope(); - return OMPSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount, - Clauses, AStmt, B); + auto *SimdDirective = OMPSimdDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getMappedDirective()); + return SimdDirective; } StmtResult @@ -10366,6 +10512,9 @@ if (!AStmt) return StmtError(); + if (!checkLastPrivateForMappedDirectives(Clauses)) + return StmtError(); + assert(isa(AStmt) && "Captured statement expected"); OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' or 'ordered' with number of loops, it will @@ -10390,10 +10539,11 @@ } } - setFunctionHasBranchProtectedScope(); - return OMPForDirective::Create( + auto *ForDirective = OMPForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion(), + DSAStack->getMappedDirective()); + return ForDirective; } StmtResult Sema::ActOnOpenMPForSimdDirective( @@ -13940,6 +14090,9 @@ if (!AStmt) return StmtError(); + if (!checkLastPrivateForMappedDirectives(Clauses)) + return StmtError(); + assert(isa(AStmt) && "Captured statement expected"); OMPLoopBasedDirective::HelperExprs B; // In presence of clause 'collapse' with number of loops, it will @@ -13955,8 +14108,10 @@ "omp for loop exprs were not built"); setFunctionHasBranchProtectedScope(); - return OMPDistributeDirective::Create(Context, StartLoc, EndLoc, - NestedLoopCount, Clauses, AStmt, B); + auto *DistributeDirective = OMPDistributeDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getMappedDirective()); + return DistributeDirective; } StmtResult Sema::ActOnOpenMPDistributeParallelForDirective( Index: clang/lib/Sema/TreeTransform.h =================================================================== --- clang/lib/Sema/TreeTransform.h +++ clang/lib/Sema/TreeTransform.h @@ -1645,14 +1645,15 @@ /// /// By default, performs semantic analysis to build the new statement. /// Subclasses may override this routine to provide different behavior. - StmtResult RebuildOMPExecutableDirective(OpenMPDirectiveKind Kind, - DeclarationNameInfo DirName, - OpenMPDirectiveKind CancelRegion, - ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { + StmtResult RebuildOMPExecutableDirective( + OpenMPDirectiveKind Kind, DeclarationNameInfo DirName, + OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, + OpenMPDirectiveKind PrevMappedDirective = OMPD_unknown) { + return getSema().ActOnOpenMPExecutableDirective( - Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc); + Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc, + PrevMappedDirective); } /// Build a new OpenMP 'if' clause. @@ -8819,7 +8820,8 @@ return getDerived().RebuildOMPExecutableDirective( D->getDirectiveKind(), DirName, CancelRegion, TClauses, - AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc()); + AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc(), + D->getMappedDirective()); } template Index: clang/test/OpenMP/generic_loop_ast_print.cpp =================================================================== --- clang/test/OpenMP/generic_loop_ast_print.cpp +++ clang/test/OpenMP/generic_loop_ast_print.cpp @@ -23,7 +23,7 @@ //PRINT: template void templ_foo(T t) { //PRINT: T j, z; -//PRINT: #pragma omp loop collapse(C) reduction(+: z) lastprivate(j) bind(thread) +//PRINT: #pragma omp simd collapse(C) reduction(+: z) lastprivate(j) //PRINT: for (T i = 0; i < t; ++i) //PRINT: for (j = 0; j < t; ++j) //PRINT: z += i + j; @@ -31,20 +31,19 @@ //DUMP: FunctionTemplateDecl{{.*}}templ_foo //DUMP: TemplateTypeParmDecl{{.*}}T //DUMP: NonTypeTemplateParmDecl{{.*}}C -//DUMP: OMPGenericLoopDirective +//DUMP: OMPSimdDirective //DUMP: OMPCollapseClause //DUMP: DeclRefExpr{{.*}}'C' 'int' //DUMP: OMPReductionClause //DUMP: DeclRefExpr{{.*}}'z' 'T' //DUMP: OMPLastprivateClause //DUMP: DeclRefExpr{{.*}}'j' 'T' -//DUMP: OMPBindClause //DUMP: ForStmt //DUMP: ForStmt //PRINT: template<> void templ_foo(int t) { //PRINT: int j, z; -//PRINT: #pragma omp loop collapse(2) reduction(+: z) lastprivate(j) bind(thread) +//PRINT: #pragma omp simd collapse(2) reduction(+: z) lastprivate(j) //PRINT: for (int i = 0; i < t; ++i) //PRINT: for (j = 0; j < t; ++j) //PRINT: z += i + j; @@ -53,7 +52,7 @@ //DUMP: TemplateArgument type 'int' //DUMP: TemplateArgument integral 2 //DUMP: ParmVarDecl{{.*}}'int':'int' -//DUMP: OMPGenericLoopDirective +//DUMP: OMPSimdDirective //DUMP: OMPCollapseClause //DUMP: ConstantExpr{{.*}}'int' //DUMP: value: Int 2 @@ -61,7 +60,6 @@ //DUMP: DeclRefExpr{{.*}}'z' 'int':'int' //DUMP: OMPLastprivateClause //DUMP: DeclRefExpr{{.*}}'j' 'int':'int' -//DUMP: OMPBindClause //DUMP: ForStmt template void templ_foo(T t) { @@ -82,12 +80,12 @@ int aaa[1000]; //PRINT: #pragma omp target teams distribute parallel for map(tofrom: MTX) - //PRINT: #pragma omp loop + //PRINT: #pragma omp simd //DUMP: OMPTargetTeamsDistributeParallelForDirective //DUMP: CapturedStmt //DUMP: ForStmt //DUMP: CompoundStmt - //DUMP: OMPGenericLoopDirective + //DUMP: OMPSimdDirective #pragma omp target teams distribute parallel for map(MTX) for (auto i = 0; i < N; ++i) { #pragma omp loop @@ -97,11 +95,11 @@ } //PRINT: #pragma omp target teams - //PRINT: #pragma omp loop + //PRINT: #pragma omp distribute //DUMP: OMPTargetTeamsDirective //DUMP: CapturedStmt //DUMP: ForStmt - //DUMP: OMPGenericLoopDirective + //DUMP: OMPDistributeDirective #pragma omp target teams for (int i=0; i<1000; ++i) { #pragma omp loop @@ -111,8 +109,8 @@ } int j, z, z1; - //PRINT: #pragma omp loop collapse(2) private(z) lastprivate(j) order(concurrent) reduction(+: z1) bind(parallel) - //DUMP: OMPGenericLoopDirective + //PRINT: #pragma omp for collapse(2) private(z) lastprivate(j) order(concurrent) reduction(+: z1) + //DUMP: OMPForDirective //DUMP: OMPCollapseClause //DUMP: IntegerLiteral{{.*}}2 //DUMP: OMPPrivateClause @@ -122,7 +120,6 @@ //DUMP: OMPOrderClause //DUMP: OMPReductionClause //DUMP-NEXT: DeclRefExpr{{.*}}'z1' - //DUMP: OMPBindClause //DUMP: ForStmt //DUMP: ForStmt #pragma omp loop collapse(2) private(z) lastprivate(j) order(concurrent) \ @@ -136,10 +133,9 @@ } //PRINT: #pragma omp target teams - //PRINT: #pragma omp loop bind(teams) + //PRINT: #pragma omp distribute //DUMP: OMPTargetTeamsDirective - //DUMP: OMPGenericLoopDirective - //DUMP: OMPBindClause + //DUMP: OMPDistributeDirective //DUMP: ForStmt #pragma omp target teams #pragma omp loop bind(teams) @@ -147,11 +143,10 @@ //PRINT: #pragma omp target //PRINT: #pragma omp teams - //PRINT: #pragma omp loop bind(teams) + //PRINT: #pragma omp distribute //DUMP: OMPTargetDirective //DUMP: OMPTeamsDirective - //DUMP: OMPGenericLoopDirective - //DUMP: OMPBindClause + //DUMP: OMPDistributeDirective //DUMP: ForStmt #pragma omp target #pragma omp teams @@ -159,17 +154,6 @@ for (auto i = 0; i < N; ++i) { } } -//PRINT: void nobindingfunc() { -//DUMP: FunctionDecl {{.*}}nobindingfunc 'void ()' -void nobindingfunc() -{ - //PRINT: #pragma omp loop - //DUMP: OMPGenericLoopDirective - //DUMP: ForStmt - #pragma omp loop - for (int i=0; i<10; ++i) { } -} - void bar() { templ_foo(8); Index: clang/test/OpenMP/generic_loop_codegen.cpp =================================================================== --- clang/test/OpenMP/generic_loop_codegen.cpp +++ clang/test/OpenMP/generic_loop_codegen.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name tmp2 --version 2 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR @@ -20,98 +20,242 @@ z += i+j; } #endif -// IR-LABEL: define {{[^@]+}}@_Z3fooi +// IR-LABEL: define dso_local void {{[_A-Za-z0-9?@]*}}foo{{[@A-Za-z]*}} // IR-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] { // IR-NEXT: entry: // IR-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 // IR-NEXT: [[Z:%.*]] = alloca i32, align 4 -// IR-NEXT: [[I1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP2TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// IR-NEXT: [[I8:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J9:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NEXT: [[Z13:%.*]] = alloca i32, align 4 // IR-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4 -// IR-NEXT: store i32 0, ptr [[I1]], align 4 -// IR-NEXT: br label [[FOR_COND:%.*]] -// IR: for.cond: -// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4 +// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 // IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4 -// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] -// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]] -// IR: for.body: -// IR-NEXT: store i32 0, ptr [[J]], align 4 -// IR-NEXT: br label [[FOR_COND2:%.*]] -// IR: for.cond2: -// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4 -// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4 -// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]] -// IR-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]] -// IR: for.body4: -// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4 -// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4 -// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] -// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4 -// IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]] -// IR-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4 -// IR-NEXT: br label [[FOR_INC:%.*]] -// IR: for.inc: -// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4 -// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4 -// IR-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]] -// IR: for.end: -// IR-NEXT: br label [[FOR_INC6:%.*]] -// IR: for.inc6: -// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4 -// IR-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1 -// IR-NEXT: store i32 [[INC7]], ptr [[I1]], align 4 -// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] -// IR: for.end8: +// IR-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP3]], 0 +// IR-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// IR-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// IR-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// IR-NEXT: store i32 0, ptr [[I8]], align 4 +// IR-NEXT: store i32 0, ptr [[J9]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[SIMD_IF_END:%.*]] +// IR: land.lhs.true: +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP5]] +// IR-NEXT: br i1 [[CMP10]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END]] +// IR: simd.if.then: +// IR-NEXT: store i64 0, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: store i32 0, ptr [[Z13]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// IR-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP7]], 1 +// IR-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP6]], [[ADD]] +// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP9]], 0 +// IR-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP8]], [[CONV18]] +// IR-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-NEXT: [[ADD21:%.*]] = add nsw i64 0, [[MUL20]] +// IR-NEXT: [[CONV22:%.*]] = trunc i64 [[ADD21]] to i32 +// IR-NEXT: store i32 [[CONV22]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP12]], 0 +// IR-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 +// IR-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]] +// IR-NEXT: [[CONV26:%.*]] = sext i32 [[MUL25]] to i64 +// IR-NEXT: [[DIV27:%.*]] = sdiv i64 [[TMP11]], [[CONV26]] +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[SUB28:%.*]] = sub nsw i32 [[TMP13]], 0 +// IR-NEXT: [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1 +// IR-NEXT: [[MUL30:%.*]] = mul nsw i32 1, [[DIV29]] +// IR-NEXT: [[CONV31:%.*]] = sext i32 [[MUL30]] to i64 +// IR-NEXT: [[MUL32:%.*]] = mul nsw i64 [[DIV27]], [[CONV31]] +// IR-NEXT: [[SUB33:%.*]] = sub nsw i64 [[TMP10]], [[MUL32]] +// IR-NEXT: [[MUL34:%.*]] = mul nsw i64 [[SUB33]], 1 +// IR-NEXT: [[ADD35:%.*]] = add nsw i64 0, [[MUL34]] +// IR-NEXT: [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32 +// IR-NEXT: store i32 [[CONV36]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP16]], [[ADD37]] +// IR-NEXT: store i32 [[ADD38]], ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP17]], 1 +// IR-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// IR: omp.inner.for.end: +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP18]], 0 +// IR-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// IR-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// IR-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// IR-NEXT: store i32 [[ADD43]], ptr [[I11]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP19]], 0 +// IR-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 +// IR-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 +// IR-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] +// IR-NEXT: store i32 [[ADD47]], ptr [[J]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[Z]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[Z13]], align 4 +// IR-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// IR-NEXT: store i32 [[ADD48]], ptr [[Z]], align 4 +// IR-NEXT: br label [[SIMD_IF_END]] +// IR: simd.if.end: // IR-NEXT: ret void // // -// IR-PCH-LABEL: define {{[^@]+}}@_Z3fooi +// IR-PCH-LABEL: define dso_local void {{[_A-Za-z0-9?@]*}}foo{{[@A-Za-z]*}} // IR-PCH-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] { // IR-PCH-NEXT: entry: // IR-PCH-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: [[Z:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[I1:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP2TMP1:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[I8:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J9:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[Z13:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4 -// IR-PCH-NEXT: store i32 0, ptr [[I1]], align 4 -// IR-PCH-NEXT: br label [[FOR_COND:%.*]] -// IR-PCH: for.cond: -// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 // IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4 -// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] -// IR-PCH-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]] -// IR-PCH: for.body: -// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 -// IR-PCH-NEXT: br label [[FOR_COND2:%.*]] -// IR-PCH: for.cond2: -// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4 -// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4 -// IR-PCH-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]] -// IR-PCH-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]] -// IR-PCH: for.body4: -// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4 -// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4 -// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] -// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4 -// IR-PCH-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]] -// IR-PCH-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4 -// IR-PCH-NEXT: br label [[FOR_INC:%.*]] -// IR-PCH: for.inc: -// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4 -// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4 -// IR-PCH-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]] -// IR-PCH: for.end: -// IR-PCH-NEXT: br label [[FOR_INC6:%.*]] -// IR-PCH: for.inc6: -// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4 -// IR-PCH-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1 -// IR-PCH-NEXT: store i32 [[INC7]], ptr [[I1]], align 4 -// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] -// IR-PCH: for.end8: +// IR-PCH-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP3]], 0 +// IR-PCH-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// IR-PCH-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// IR-PCH-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[I8]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J9]], align 4 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[SIMD_IF_END:%.*]] +// IR-PCH: land.lhs.true: +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP5]] +// IR-PCH-NEXT: br i1 [[CMP10]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END]] +// IR-PCH: simd.if.then: +// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[Z13]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// IR-PCH-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP7]], 1 +// IR-PCH-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP6]], [[ADD]] +// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP9]], 0 +// IR-PCH-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-PCH-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-PCH-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-PCH-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP8]], [[CONV18]] +// IR-PCH-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-PCH-NEXT: [[ADD21:%.*]] = add nsw i64 0, [[MUL20]] +// IR-PCH-NEXT: [[CONV22:%.*]] = trunc i64 [[ADD21]] to i32 +// IR-PCH-NEXT: store i32 [[CONV22]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP12]], 0 +// IR-PCH-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 +// IR-PCH-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]] +// IR-PCH-NEXT: [[CONV26:%.*]] = sext i32 [[MUL25]] to i64 +// IR-PCH-NEXT: [[DIV27:%.*]] = sdiv i64 [[TMP11]], [[CONV26]] +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[SUB28:%.*]] = sub nsw i32 [[TMP13]], 0 +// IR-PCH-NEXT: [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1 +// IR-PCH-NEXT: [[MUL30:%.*]] = mul nsw i32 1, [[DIV29]] +// IR-PCH-NEXT: [[CONV31:%.*]] = sext i32 [[MUL30]] to i64 +// IR-PCH-NEXT: [[MUL32:%.*]] = mul nsw i64 [[DIV27]], [[CONV31]] +// IR-PCH-NEXT: [[SUB33:%.*]] = sub nsw i64 [[TMP10]], [[MUL32]] +// IR-PCH-NEXT: [[MUL34:%.*]] = mul nsw i64 [[SUB33]], 1 +// IR-PCH-NEXT: [[ADD35:%.*]] = add nsw i64 0, [[MUL34]] +// IR-PCH-NEXT: [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32 +// IR-PCH-NEXT: store i32 [[CONV36]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP16]], [[ADD37]] +// IR-PCH-NEXT: store i32 [[ADD38]], ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP17]], 1 +// IR-PCH-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP18]], 0 +// IR-PCH-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// IR-PCH-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// IR-PCH-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// IR-PCH-NEXT: store i32 [[ADD43]], ptr [[I11]], align 4 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-PCH-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP19]], 0 +// IR-PCH-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 +// IR-PCH-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 +// IR-PCH-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] +// IR-PCH-NEXT: store i32 [[ADD47]], ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[Z]], align 4 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[Z13]], align 4 +// IR-PCH-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// IR-PCH-NEXT: store i32 [[ADD48]], ptr [[Z]], align 4 +// IR-PCH-NEXT: br label [[SIMD_IF_END]] +// IR-PCH: simd.if.end: // IR-PCH-NEXT: ret void // Index: clang/test/OpenMP/loop_bind_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/loop_bind_codegen.cpp @@ -0,0 +1,133 @@ +// expected-no-diagnostics +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s + + +#define NNN 50 +int aaa[NNN]; + +void parallel_loop() { + #pragma omp parallel + { + #pragma omp loop bind(parallel) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } +} + +void parallel_loop_orphan() { + #pragma omp loop bind(parallel) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } +} + + +void teams_loop() { + #pragma omp teams + { + #pragma omp loop bind(teams) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } +} + +void thread_loop() { + #pragma omp parallel + { + #pragma omp loop bind(thread) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } +} + +void thread_loop_orphan() { + #pragma omp loop bind(thread) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } +} + +int main() { + parallel_loop(); + parallel_loop_orphan(); + teams_loop(); + thread_loop(); + thread_loop_orphan(); + + return 0; +} +// CHECK-LABEL: define dso_local void @{{.+}}parallel_loop +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr {{.+}}parallel_loop{{.+}}.omp_outlined{{.*}}) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}parallel_loop{{.+}}.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: omp.inner.for.body: +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini +// CHECK-NEXT: call void @__kmpc_barrier +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void {{.+}}parallel_loop_orphan{{.+}} +// CHECK-NEXT: entry: +// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num +// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: omp.inner.for.body: +// CHECK: omp.inner.for.end: +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini +// CHECK-NEXT: call void @__kmpc_barrier +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void {{.+}}teams_loop{{.+}} +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr {{.+}}teams_loop{{.+}}.omp_outlined{{.*}}) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}teams_loop{{.+}}.omp_outlined{{.+}} +// CHECK-NEXT: entry: +// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: omp.inner.for.body: +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini +// +// +// CHECK-LABEL: define dso_local void {{.+}}thread_loop{{.+}} +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr {{.+}}thread_loop{{.+}}.omp_outlined{{.*}}) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}thread_loop{{.+}}.omp_outlined{{.+}} +// CHECK-NEXT: entry: +// CHECK: omp.inner.for.body: +// CHECK: omp.inner.for.end: +// +// +// CHECK-LABEL: define dso_local void {{.+}}thread_loop_orphan{{.+}} +// CHECK-NEXT: entry: +// CHECK: omp.inner.for.cond: +// CHECK: omp.inner.for.body: +// CHECK: omp.inner.for.end: +// +// +// CHECK-LABEL: define {{.+}}main{{.+}} +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK: call void {{.+}}parallel_loop{{.+}}() +// CHECK-NEXT: call void {{.+}}parallel_loop_orphan{{.+}}() +// CHECK-NEXT: call void {{.+}}teams_loop{{.+}}() +// CHECK-NEXT: call void {{.+}}thread_loop{{.+}}() +// CHECK-NEXT: call void {{.+}}thread_loop_orphan{{.+}}() +// CHECK-NEXT: ret i32 0 +// Index: clang/test/OpenMP/loop_bind_enclosed.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/loop_bind_enclosed.cpp @@ -0,0 +1,190 @@ +// expected-no-diagnostics +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s + +#define NNN 50 +int aaa[NNN]; + +void parallel_taskgroup_loop() { + #pragma omp parallel + { + #pragma omp taskgroup + for (int i = 0 ; i < 2 ; i++) { + #pragma omp loop + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } + } +} + +void parallel_taskwait_loop() { + #pragma omp parallel + { + #pragma omp taskwait + for (int i = 0 ; i < 2 ; i++) { + #pragma omp loop + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } + } +} + +void parallel_single_loop() { + #pragma omp parallel + { + for (int i = 0 ; i < 2 ; i++) { + #pragma omp single + #pragma omp loop + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } + } +} + +void parallel_order_loop() { + #pragma omp parallel + { + #pragma omp for order(concurrent) + { + for (int i = 0 ; i < 2 ; i++) { + #pragma omp loop + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } + } + } +} + + +void parallel_cancel_loop(bool flag) { + #pragma omp ordered + for (int i = 0 ; i < 2 ; i++) { + #pragma omp parallel + { + #pragma omp cancel parallel if(flag) + aaa[0] = 0; + #pragma omp loop bind(parallel) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } + } +} + +int +main(int argc, char *argv[]) { + parallel_taskgroup_loop(); + parallel_taskwait_loop(); + parallel_single_loop(); + parallel_order_loop(); + parallel_cancel_loop(true); + parallel_cancel_loop(false); + + return 0; +} +// CHECK-LABEL: define dso_local void {{.+}}parallel_taskgroup_loop{{.+}} { +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr {{.+}}parallel_taskgroup_loop{{.+}}.omp_outlined{{.*}} +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}parallel_taskgroup_loop{{.+}}.omp_outlined{{.+}} { +// CHECK: call void @__kmpc_taskgroup +// CHECK: for.body: +// CHECK: omp.inner.for.cond: +// CHECK: omp.inner.for.body: +// CHECK: omp.inner.for.inc: +// CHECK: omp.inner.for.end: +// CHECK: for.end: +// CHECK: call void @__kmpc_end_taskgroup +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void {{.+}}parallel_taskwait_loop{{.+}} { +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr {{.+}}parallel_taskwait_loop{{.+}}.omp_outlined{{.*}}) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}parallel_taskwait_loop{{.+}}.omp_outlined{{.+}} { +// CHECK: [[TMP2:%.*]] = call i32 @__kmpc_omp_taskwait +// CHECK: for.cond: +// CHECK: for.body: +// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: omp.inner.for.cond: +// CHECK: omp.inner.for.body: +// CHECK: omp.body.continue: +// CHECK: omp.inner.for.inc: +// CHECK: omp.inner.for.end: +// CHECK: omp.loop.exit: +// CHECK: call void @__kmpc_for_static_fini +// CHECK: call void @__kmpc_barrier +// CHECK: for.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void {{.+}}parallel_single_loop{{.+}} { +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr {{.+}}parallel_single_loop{{.+}}.omp_outlined{{.*}}) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}parallel_single_loop{{.+}}.omp_outlined{{.+}} { +// CHECK: for.body: +// CHECK: [[TMP3:%.*]] = call i32 @__kmpc_single +// CHECK: omp.inner.for.end: +// CHECK: call void @__kmpc_end_single +// CHECK: omp_if.end: +// CHECK: call void @__kmpc_barrier +// CHECK: for.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void {{.+}}parallel_order_loop{{.+}} { +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr {{.+}}parallel_order_loop{{.+}}.omp_outlined{{.*}}) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}parallel_order_loop{{.+}}.omp_outlined{{.+}} { +// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: omp.inner.for.body: +// CHECK: omp.loop.exit: +// CHECK: call void @__kmpc_for_static_fini +// CHECK: call void @__kmpc_barrier +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void {{.+}}parallel_cancel_loop{{.+}} { +// CHECK: [[FLAG_ADDR:%.*]] = alloca i8, +// CHECK: call void @__kmpc_ordered +// CHECK: for.body: +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr {{.+}}parallel_cancel_loop{{.+}}.omp_outlined{{.*}}, ptr [[FLAG_ADDR]]) +// CHECK: for.end: +// CHECK: call void @__kmpc_end_ordered +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void {{.+}}parallel_cancel_loop{{.+}}.omp_outlined{{.+}} { +// CHECK: omp_if.then: +// CHECK: [[TMP4:%.*]] = call i32 @__kmpc_cancel +// CHECK: .cancel.exit: +// CHECK: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier +// CHECK: omp_if.end: +// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: omp.inner.for.body: +// CHECK: omp.loop.exit: +// CHECK: call void @__kmpc_for_static_fini +// CHECK: [[TMP24:%.*]] = call i32 @__kmpc_cancel_barrier +// CHECK: .cancel.continue5: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local noundef i32 @main{{.+}} { +// CHECK: call void {{.+}}parallel_taskgroup_loop{{.+}}() +// CHECK-NEXT: call void {{.+}}parallel_taskwait_loop{{.+}}() +// CHECK-NEXT: call void {{.+}}parallel_single_loop{{.+}}() +// CHECK-NEXT: call void {{.+}}parallel_order_loop{{.+}}() +// CHECK-NEXT: call void {{.+}}parallel_cancel_loop{{.+}}(i1 noundef zeroext true) +// CHECK-NEXT: call void {{.+}}parallel_cancel_loop{{.+}}(i1 noundef zeroext false) +// Index: clang/test/OpenMP/loop_bind_messages.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/loop_bind_messages.cpp @@ -0,0 +1,76 @@ +#ifndef HEADER +#define HEADER +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -verify %s + +#define NNN 50 +int aaa[NNN]; + +void parallel_loop() { + #pragma omp parallel + { + #pragma omp loop + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } +} + +void teams_loop() { + int var1, var2; + + #pragma omp teams + { + #pragma omp loop bind(teams) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + + #pragma omp loop bind(teams) collapse(2) private(var1) + for (int i = 0 ; i < 3 ; i++) { + for (int j = 0 ; j < NNN ; j++) { + var1 += aaa[j]; + } + } + } +} + +void orphan_loop_with_bind() { + #pragma omp loop bind(parallel) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } +} + +void orphan_loop_no_bind() { + #pragma omp loop // expected-error{{expected 'bind' clause for 'loop' construct without an enclosing OpenMP construct}} + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } +} + +void teams_loop_reduction() { + int total = 0; + + #pragma omp teams + { + #pragma omp loop bind(teams) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + + #pragma omp loop bind(teams) reduction(+:total) // expected-error{{'reduction' clause not allowed with '#pragma omp loop bind(teams)'}} + for (int j = 0 ; j < NNN ; j++) { + total+=aaa[j]; + } + } +} + +int main(int argc, char *argv[]) { + parallel_loop(); + teams_loop(); + orphan_loop_with_bind(); + orphan_loop_no_bind(); + teams_loop_reduction(); +} + +#endif Index: clang/test/OpenMP/nested_loop_codegen.cpp =================================================================== --- clang/test/OpenMP/nested_loop_codegen.cpp +++ clang/test/OpenMP/nested_loop_codegen.cpp @@ -58,6 +58,12 @@ // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 @@ -66,35 +72,27 @@ // CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 -// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]] // CHECK1: for.body: -// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK1-NEXT: br label [[FOR_COND1:%.*]] -// CHECK1: for.cond1: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 5 -// CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]] -// CHECK1: for.body3: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[K]], align 4 -// CHECK1-NEXT: br label [[FOR_INC:%.*]] -// CHECK1: for.inc: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC4]], ptr [[K]], align 4 -// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP3:![0-9]+]] -// CHECK1: for.end: -// CHECK1-NEXT: br label [[FOR_INC5:%.*]] -// CHECK1: for.inc5: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] -// CHECK1: for.end7: -// CHECK1-NEXT: ret void +// CHECK1-NEXT [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT call void @__kmpc_for_static_init_4(ptr @1, i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +//CHECK1 cond.end: +//CHECK1 omp.inner.for.cond: +//CHECK1 omp.inner.for.body: +//CHECK1 omp.body.continue: +//CHECK1 omp.inner.for.inc: +//CHECK1 omp.inner.for.end: +//CHECK1 omp.loop.exit: +// CHECK1-NEXT [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT call void @__kmpc_for_static_fini(ptr @1, i32 [[TMP14]]) +// CHECK1-NEXT [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT call void @__kmpc_barrier(ptr @2, i32 [[TMP16]]) +//CHECK1 for.inc: +//CHECK1 for.end: +// CHECK1-NEXT ret void +// // // // CHECK1-LABEL: define {{[^@]+}}@_Z11inline_declv @@ -114,45 +112,36 @@ // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[FOR_COND:%.*]] -// CHECK1: for.cond: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 -// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]] -// CHECK1: for.body: -// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK1-NEXT: br label [[FOR_COND1:%.*]] -// CHECK1: for.cond1: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP3]], 5 -// CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]] -// CHECK1: for.body3: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: br label [[FOR_INC:%.*]] -// CHECK1: for.inc: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[INC4]], ptr [[K]], align 4 -// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK1: for.end: -// CHECK1-NEXT: br label [[FOR_INC5:%.*]] -// CHECK1: for.inc5: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] -// CHECK1: for.end7: -// CHECK1-NEXT: ret void +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1: for.cond: +// CHECK1: for.body: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @1, i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1: omp.inner.for.cond: +// CHECK1: omp.inner.for.body: +// CHECK1: omp.body.continue: +// CHECK1: omp.inner.for.inc: +// CHECK1: omp.inner.for.end: +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @2, i32 [[TMP16]]) +// CHECK1: for.inc: +// CHECK1: for.end: +// CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_Z12outline_declv @@ -173,6 +162,12 @@ // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] @@ -180,39 +175,24 @@ // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META29:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG32:![0-9]+]] -// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG33:![0-9]+]] -// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG35:![0-9]+]] -// CHECK2: for.cond: -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG36:![0-9]+]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10, !dbg [[DBG38:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG39:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]] -// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG44:![0-9]+]] -// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG46:![0-9]+]] -// CHECK2: for.cond1: -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG47:![0-9]+]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 5, !dbg [[DBG49:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG50:![0-9]+]] -// CHECK2: for.body3: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG51:![0-9]+]] -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1, !dbg [[DBG51]] -// CHECK2-NEXT: store i32 [[INC]], ptr [[K]], align 4, !dbg [[DBG51]] -// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG53:![0-9]+]] +// CHECK2: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !50 +// CHECK2: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg !50 +// CHECK2: call void @__kmpc_for_static_init_4(ptr @1, i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg !51 +// CHECK2: omp.inner.for.cond: +// CHECK2: omp.inner.for.body: +// CHECK2: omp.body.continue: +// CHECK2: omp.inner.for.inc: +// CHECK2: omp.inner.for.end: +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !51 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg !51 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @3, i32 [[TMP13]]), !dbg !58 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !58 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg !58 +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @4, i32 [[TMP15]]), !dbg !58 // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG54]] -// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG54]] -// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG55:![0-9]+]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG59:![0-9]+]] -// CHECK2: for.inc5: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG60:![0-9]+]] -// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG60]] -// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4, !dbg [[DBG60]] -// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG61:![0-9]+]], !llvm.loop [[LOOP62:![0-9]+]] -// CHECK2: for.end7: // CHECK2-NEXT: ret void, !dbg [[DBG64:![0-9]+]] // // @@ -255,6 +235,12 @@ // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]] @@ -273,32 +259,31 @@ // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG97:![0-9]+]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG98:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]] -// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG103]] -// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG104:![0-9]+]] -// CHECK2: for.cond1: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP3]], 5, !dbg [[DBG107:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG108:![0-9]+]] -// CHECK2: for.body3: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG109:![0-9]+]] -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG109]] -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4, !dbg [[DBG109]] -// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG111:![0-9]+]] +// CHECK2: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @8, i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP5]], 4, !dbg [[DBG103:![0-9]+]] +// CHECK2: omp.inner.for.cond: +// CHECK2: omp.inner.for.body: +// CHECK2: omp.body.continue: +// CHECK2: omp.inner.for.inc: +// CHECK2: omp.inner.for.end: +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !111 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg !111 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @10, i32 [[TMP14]]), !dbg !118 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !118 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !dbg !118 +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @11, i32 [[TMP16]]), !dbg !118 +// CHECK2-NEXT br label [[FOR_INC]], !dbg !119 // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG112:![0-9]+]] -// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG112]] -// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG112]] -// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG113:![0-9]+]], !llvm.loop [[LOOP114:![0-9]+]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG112:![0-9]+]] +// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP17]], 1, !dbg [[DBG112]] +// CHECK2-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4, !dbg [[DBG112]] +// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG113:![0-9]+]], !llvm.loop [[DBG113:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG116:![0-9]+]] -// CHECK2: for.inc5: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG117:![0-9]+]] -// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG117]] -// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4, !dbg [[DBG117]] -// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG118:![0-9]+]], !llvm.loop [[LOOP119:![0-9]+]] -// CHECK2: for.end7: -// CHECK2-NEXT: ret void, !dbg [[DBG121:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG114:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv.omp_outlined @@ -362,10 +347,14 @@ // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: // CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: br label [[FOR_COND:%.*]] +// CHECK3-NEXT: br label [[FOR_COND:]] // CHECK3: for.cond: // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 @@ -387,26 +376,35 @@ // CHECK3-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 // CHECK3-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] // CHECK3: omp_loop.preheader: +// CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 // CHECK3-NEXT: br label [[OMP_LOOP_HEADER:%.*]] // CHECK3: omp_loop.header: // CHECK3-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] // CHECK3-NEXT: br label [[OMP_LOOP_COND:%.*]] // CHECK3: omp_loop.cond: -// CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]] // CHECK3-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp_loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @2, i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK3-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK3: omp_loop.after: -// CHECK3-NEXT: br label [[FOR_INC:%.*]] // CHECK3: for.inc: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK3-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK3: omp_loop.body: -// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_K]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]] +// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_K]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[LOADGEP_K]], align 4 // CHECK3-NEXT: br label [[OMP_LOOP_INC]] // CHECK3: omp_loop.inc: @@ -513,20 +511,16 @@ // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: -// CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: br label [[FOR_COND:%.*]] // CHECK3: for.cond: -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 -// CHECK3-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK3: for.end: -// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK3: for.body: // CHECK3-NEXT: store i32 0, ptr [[K]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 @@ -538,31 +532,15 @@ // CHECK3-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 // CHECK3-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] // CHECK3: omp_loop.preheader: -// CHECK3-NEXT: br label [[OMP_LOOP_HEADER:%.*]] // CHECK3: omp_loop.header: // CHECK3-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] // CHECK3-NEXT: br label [[OMP_LOOP_COND:%.*]] // CHECK3: omp_loop.cond: -// CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] -// CHECK3-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp_loop.exit: -// CHECK3-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK3: omp_loop.after: -// CHECK3-NEXT: br label [[FOR_INC:%.*]] // CHECK3: for.inc: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK3-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4 -// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp_loop.body: -// CHECK3-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4 -// CHECK3-NEXT: br label [[OMP_LOOP_INC]] // CHECK3: omp_loop.inc: -// CHECK3-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 -// CHECK3-NEXT: br label [[OMP_LOOP_HEADER]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -665,22 +643,18 @@ // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]] -// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG23]] // CHECK4: for.cond: -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG25:![0-9]+]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG25]] -// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG23]] // CHECK4: for.end: -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG27:![0-9]+]] // CHECK4: omp.par.region.parallel.after: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG27]] // CHECK4: for.body: -// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_K]], align 4, !dbg [[DBG28:![0-9]+]] +// CHECK4: store i32 0, ptr [[LOADGEP_K]], align 4, !dbg [[DBG28:![0-9]+]] // CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG28]] // CHECK4-NEXT: store ptr [[LOADGEP_K]], ptr [[TMP3]], align 8, !dbg [[DBG28]] // CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG28]] @@ -690,31 +664,28 @@ // CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG28]] // CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG28]] // CHECK4: omp_loop.preheader: +// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG28]] +// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG28]] +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG28]] +// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG28]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @3), !dbg [[DBG28]] +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @3, i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG28]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG28]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG28]] +// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[DBG28]] +// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[DBG28]] // CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG28]] // CHECK4: omp_loop.header: -// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG28]] -// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG28]] // CHECK4: omp_loop.cond: -// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]], !dbg [[DBG28]] -// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG28]] // CHECK4: omp_loop.exit: +// CHECK4: call void @__kmpc_for_static_fini(ptr @3, i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG28]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @3), !dbg [[DBG33:![0-9]+]] +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @4, i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG33]] // CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG28]] // CHECK4: omp_loop.after: -// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG33:![0-9]+]] // CHECK4: for.inc: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG25]] -// CHECK4-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG25]] -// CHECK4-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG25]] -// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG25]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp_loop.body: -// CHECK4-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG28]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_K]], align 4, !dbg [[DBG36:![0-9]+]] -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG36]] -// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_K]], align 4, !dbg [[DBG36]] -// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG28]] // CHECK4: omp_loop.inc: -// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG28]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG28]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -826,57 +797,29 @@ // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG86:![0-9]+]] -// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG86]] // CHECK4: for.cond: -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88:![0-9]+]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG88]] -// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG86]] // CHECK4: for.end: -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG90:![0-9]+]] // CHECK4: omp.par.region.parallel.after: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG90]] // CHECK4: for.body: -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG95:![0-9]+]] -// CHECK4-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG95]] -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG95]] -// CHECK4-NEXT: store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[DBG95]] -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG95]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG96:![0-9]+]] -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG95]] +// CHECK4: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG95:![0-9]+]] // CHECK4-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG95]] -// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG95]] -// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG95]] // CHECK4: omp_loop.preheader: -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG95]] // CHECK4: omp_loop.header: -// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG95]] -// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG95]] // CHECK4: omp_loop.cond: -// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]], !dbg [[DBG95]] -// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG95]] // CHECK4: omp_loop.exit: -// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG95]] // CHECK4: omp_loop.after: -// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG97:![0-9]+]] // CHECK4: for.inc: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]] -// CHECK4-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG88]] -// CHECK4-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]] -// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP98:![0-9]+]] // CHECK4: omp_loop.body: -// CHECK4-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG95]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG99:![0-9]+]] -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG99]] -// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG99]] -// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG95]] +// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV:%.*]], [[TMP7:%.*]], !dbg [[DBG98:![-9]+]] +// CHECK4: call void @__captured_stmt.3(ptr [[K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG96:![0-9]+]] // CHECK4: omp_loop.inc: -// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG95]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG95]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void //