Index: include/clang/AST/StmtOpenMP.h =================================================================== --- include/clang/AST/StmtOpenMP.h +++ include/clang/AST/StmtOpenMP.h @@ -392,9 +392,11 @@ CombinedConditionOffset = 25, CombinedNextLowerBoundOffset = 26, CombinedNextUpperBoundOffset = 27, + CombinedDistConditionOffset = 28, + CombinedParForInDistConditionOffset = 29, // Offset to the end (and start of the following counters/updates/finals // arrays) for combined distribute loop directives. - CombinedDistributeEnd = 28, + CombinedDistributeEnd = 30, }; /// Get the counters storage. @@ -605,6 +607,17 @@ "expected loop bound sharing directive"); *std::next(child_begin(), CombinedNextUpperBoundOffset) = CombNUB; } + void setCombinedDistCond(Expr *CombDistCond) { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + *std::next(child_begin(), CombinedDistConditionOffset) = CombDistCond; + } + void setCombinedParForInDistCond(Expr *CombParForInDistCond) { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + *std::next(child_begin(), + CombinedParForInDistConditionOffset) = CombParForInDistCond; + } void setCounters(ArrayRef A); void setPrivateCounters(ArrayRef A); void setInits(ArrayRef A); @@ -637,6 +650,13 @@ /// Update of UpperBound for statically scheduled omp loops for /// outer loop in combined constructs (e.g. 'distribute parallel for') Expr *NUB; + /// Distribute Loop condition used when composing 'omp distribute' + /// with 'omp for' in a same construct when schedule is chunked. + Expr *DistCond; + /// 'omp parallel for' loop condition used when composed with + /// 'omp distribute' in the same construct and when schedule is + /// chunked and the chunk size is 1. + Expr *ParForInDistCond; }; /// The expressions built for the OpenMP loop CodeGen for the @@ -754,6 +774,8 @@ DistCombinedFields.Cond = nullptr; DistCombinedFields.NLB = nullptr; DistCombinedFields.NUB = nullptr; + DistCombinedFields.DistCond = nullptr; + DistCombinedFields.ParForInDistCond = nullptr; } }; @@ -922,6 +944,18 @@ return const_cast(reinterpret_cast( *std::next(child_begin(), CombinedNextUpperBoundOffset))); } + Expr *getCombinedDistCond() const { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), CombinedDistConditionOffset))); + } + Expr *getCombinedParForInDistCond() const { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), CombinedParForInDistConditionOffset))); + } const Stmt *getBody() const { // This relies on the loop form is already checked by Sema. const Stmt *Body = Index: include/clang/Basic/OpenMPKinds.h =================================================================== --- include/clang/Basic/OpenMPKinds.h +++ include/clang/Basic/OpenMPKinds.h @@ -125,6 +125,7 @@ OpenMPScheduleClauseKind Schedule = OMPC_SCHEDULE_unknown; OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; + bool HasChunkOne = false; }; OpenMPDirectiveKind getOpenMPDirectiveKind(llvm::StringRef Str); Index: lib/AST/StmtOpenMP.cpp =================================================================== --- lib/AST/StmtOpenMP.cpp +++ lib/AST/StmtOpenMP.cpp @@ -1079,6 +1079,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); Dir->HasCancel = HasCancel; return Dir; } @@ -1145,6 +1147,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } @@ -1457,6 +1461,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } @@ -1524,6 +1530,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); Dir->HasCancel = HasCancel; return Dir; } @@ -1670,6 +1678,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); Dir->HasCancel = HasCancel; return Dir; } @@ -1741,6 +1751,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -890,6 +890,20 @@ virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const; + /// Check if the specified \a ScheduleKind is static chunked. + /// \param ScheduleKind Schedule kind specified in the 'schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const; + + /// Check if the specified \a ScheduleKind is static non-chunked. + /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind, + bool Chunked) const; + /// Check if the specified \a ScheduleKind is dynamic. /// This kind of worksharing directive is emitted without outer loop. /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause. @@ -1505,7 +1519,7 @@ /// Choose default schedule type and chunk value for the /// schedule clause. virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, - const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, + const OMPLoopDirective &S, OpenMPScheduleTy &ScheduleKind, llvm::Value *&Chunk) const {} /// Emits call of the outlined function with the provided arguments, Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -3292,6 +3292,18 @@ return Schedule == OMP_dist_sch_static; } +bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const { + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); + return Schedule == OMP_sch_static_chunked; +} + +bool CGOpenMPRuntime::isStaticChunked( + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_dist_sch_static_chunked; +} bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { OpenMPSchedType Schedule = Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -347,7 +347,7 @@ /// Choose a default value for the schedule clause. void getDefaultScheduleAndChunk(CodeGenFunction &CGF, - const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, + const OMPLoopDirective &S, OpenMPScheduleTy &ScheduleKind, llvm::Value *&Chunk) const override; private: Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4246,9 +4246,10 @@ void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk( CodeGenFunction &CGF, const OMPLoopDirective &S, - OpenMPScheduleClauseKind &ScheduleKind, + OpenMPScheduleTy &ScheduleKind, llvm::Value *&Chunk) const { - ScheduleKind = OMPC_SCHEDULE_static; + ScheduleKind.Schedule = OMPC_SCHEDULE_static; + ScheduleKind.HasChunkOne = true; Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize( S.getIterationVariable()->getType()), 1); } Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -2006,7 +2006,7 @@ RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); // for combined 'distribute' and 'for' the increment expression of distribute - // is store in DistInc. For 'distribute' alone, it is in Inc. + // is stored in DistInc. For 'distribute' alone, it is in Inc. Expr *IncExpr; if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) IncExpr = S.getDistInc(); @@ -2313,7 +2313,7 @@ } else { // Default behaviour for schedule clause. CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( - *this, S, ScheduleKind.Schedule, Chunk); + *this, S, ScheduleKind, Chunk); } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); @@ -2357,6 +2357,42 @@ S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); + } else if (RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && + ScheduleKind.HasChunkOne && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + EmitOMPSimdInit(S, /*IsMonotonic=*/true); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress(), Chunk); + RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), + ScheduleKind, StaticInit); + JumpDest LoopExit = + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); + // IV = LB; + EmitIgnoredExpr(S.getInit()); + + // Generate the following loop: + // + // while (IV <= PrevUB) { + // BODY; + // IV += ST; + // } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCombinedParForInDistCond(), + S.getDistInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [&](CodeGenFunction &) {}); + EmitBlock(LoopExit.getBlock()); + // Tell the runtime we are done. + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), + S.getDirectiveKind()); + }; + OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } else { const bool IsMonotonic = Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || @@ -3370,7 +3406,7 @@ ? S.getCombinedCond() : S.getCond(); - // for distribute alone, codegen + // for distribute alone, codegen // while (idx <= UB) { BODY; ++idx; } // when combined with 'for' (e.g. as in 'distribute parallel for') // while (idx <= UB) { ; idx += ST; } @@ -3382,6 +3418,53 @@ EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); + } else if (RT.isStaticChunked(ScheduleKind, + /* Chunked */ Chunk != nullptr) && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + // Generate a single loop when distribute is in combination with + // other worksharing pragmas like parallel for. + if (isOpenMPSimdDirective(S.getDirectiveKind())) + EmitOMPSimdInit(S, /*IsMonotonic=*/true); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress(), Chunk); + RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, + StaticInit); + + JumpDest LoopExit = + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); + // UB = min(UB, GlobalUB); + EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + + // IV = LB; + EmitIgnoredExpr(S.getCombinedInit()); + + // IV < GlobalUB; + const Expr *Cond = S.getCombinedDistCond(); + + // Generate the following loop: + // + // while (IV <= GlobalUB) { + // (LB, UB); + // LB += ST; + // UB += ST; + // UB = min(UB, GlobalUB); + // IV = LB; + // } + // + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [&S](CodeGenFunction &CGF) { + CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); + CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedInit()); + }); + EmitBlock(LoopExit.getBlock()); + // Tell the runtime we are done. + RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -354,7 +354,7 @@ return OMPD_unknown; return std::next(Stack.back().first.rbegin())->Directive; } - + /// Add requires decl to internal vector void addRequiresDecl(OMPRequiresDecl *RD) { RequiresDecls.push_back(RD); @@ -381,7 +381,7 @@ } return IsDuplicate; } - + /// Set default data sharing attribute to none. void setDefaultDSANone(SourceLocation Loc) { assert(!isStackEmpty()); @@ -5201,6 +5201,12 @@ ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get()) : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(), NumIterations.get()); + ExprResult CombDistCond; + if (isOpenMPLoopBoundSharingDirective(DKind)) { + CombDistCond = + SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), LastIteration.get()); + } + ExprResult CombCond; if (isOpenMPLoopBoundSharingDirective(DKind)) { CombCond = @@ -5275,7 +5281,7 @@ // on PrevUB instead of NumIterations - used to implement 'for' when found // in combination with 'distribute', like in 'distribute parallel for' SourceLocation DistIncLoc = AStmt->getBeginLoc(); - ExprResult DistCond, DistInc, PrevEUB; + ExprResult DistCond, DistInc, PrevEUB, ParForInDistCond; if (isOpenMPLoopBoundSharingDirective(DKind)) { DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get()); assert(DistCond.isUsable() && "distribute cond expr was not built"); @@ -5298,6 +5304,11 @@ PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(), CondOp.get()); PrevEUB = SemaRef.ActOnFinishFullExpr(PrevEUB.get()); + + // Build IV <= PrevEUB to be used in parallel for is in combination with + // a distribute directive with schedule(static, 1) + ParForInDistCond = + SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), PrevUB.get()); } // Build updates and final values of the loop counters. @@ -5421,6 +5432,8 @@ Built.DistCombinedFields.Cond = CombCond.get(); Built.DistCombinedFields.NLB = CombNextLB.get(); Built.DistCombinedFields.NUB = CombNextUB.get(); + Built.DistCombinedFields.DistCond = CombDistCond.get(); + Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get(); return NestedLoopCount; } Index: lib/Serialization/ASTReaderStmt.cpp =================================================================== --- lib/Serialization/ASTReaderStmt.cpp +++ lib/Serialization/ASTReaderStmt.cpp @@ -1856,6 +1856,8 @@ D->setCombinedCond(Record.readSubExpr()); D->setCombinedNextLowerBound(Record.readSubExpr()); D->setCombinedNextUpperBound(Record.readSubExpr()); + D->setCombinedDistCond(Record.readSubExpr()); + D->setCombinedParForInDistCond(Record.readSubExpr()); } SmallVector Sub; unsigned CollapsedNum = D->getCollapsedNumber(); Index: lib/Serialization/ASTWriterStmt.cpp =================================================================== --- lib/Serialization/ASTWriterStmt.cpp +++ lib/Serialization/ASTWriterStmt.cpp @@ -1854,6 +1854,8 @@ Record.AddStmt(D->getCombinedCond()); Record.AddStmt(D->getCombinedNextLowerBound()); Record.AddStmt(D->getCombinedNextUpperBound()); + Record.AddStmt(D->getCombinedDistCond()); + Record.AddStmt(D->getCombinedParForInDistCond()); } for (auto I : D->counters()) { Record.AddStmt(I); Index: test/OpenMP/distribute_parallel_for_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_codegen.cpp +++ test/OpenMP/distribute_parallel_for_codegen.cpp @@ -407,18 +407,16 @@ a[i] = b[i] + c[i]; // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // LAMBDA-DAG: [[EUB_TRUE]]: @@ -437,18 +435,9 @@ // check exit condition // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // LAMBDA: [[DIST_INNER_LOOP_BODY]]: @@ -467,13 +456,6 @@ // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] - - // LAMBDA: [[DIST_INNER_LOOP_END]]: - // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -482,10 +464,31 @@ // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // LAMBDA: [[DIST_OUTER_LOOP_END]]: + // Update UB + // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // LAMBDA-DAG: [[EUB_TRUE_1]]: + // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA: br label %[[EUB_END_1:.+]] + // LAMBDA-DAG: [[EUB_FALSE_1]]: + // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: br label %[[EUB_END_1]] + // LAMBDA-DAG: [[EUB_END_1]]: + // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // LAMBDA: [[DIST_INNER_LOOP_END]]: + // LAMBDA: br label %[[LOOP_EXIT:.+]] + + // loop exit + // LAMBDA: [[LOOP_EXIT]]: // LAMBDA-DAG: call void @__kmpc_for_static_fini( // LAMBDA: ret @@ -1155,18 +1158,17 @@ a[i] = b[i] + c[i]; // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, + // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1185,18 +1187,9 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // CHECK: [[DIST_OUTER_LOOP_BODY]]: - // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // CHECK: [[DIST_INNER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1215,13 +1208,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - - // CHECK: [[DIST_INNER_LOOP_END]]: - // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // CHECK: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1230,10 +1216,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // CHECK: [[DIST_OUTER_LOOP_END]]: + // Update UB + // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // CHECK-DAG: [[EUB_TRUE_1]]: + // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // CHECK: br label %[[EUB_END_1:.+]] + // CHECK-DAG: [[EUB_FALSE_1]]: + // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: br label %[[EUB_END_1]] + // CHECK-DAG: [[EUB_END_1]]: + // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // CHECK: [[DIST_INNER_LOOP_END]]: + // CHECK: br label %[[LOOP_EXIT:.+]] + + // loop exit + // CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret @@ -1868,18 +1875,17 @@ // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] -// CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, +// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1898,18 +1904,9 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], +// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] -// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - -// CHECK: [[DIST_OUTER_LOOP_BODY]]: -// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - -// CHECK: [[DIST_INNER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] -// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] +// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1928,13 +1925,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - -// CHECK: [[DIST_INNER_LOOP_END]]: -// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - -// CHECK: [[DIST_OUTER_LOOP_INC]]: -// check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1943,10 +1933,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] -// outer loop exit -// CHECK: [[DIST_OUTER_LOOP_END]]: +// Update UB +// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], +// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], +// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] +// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] +// CHECK-DAG: [[EUB_TRUE_1]]: +// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], +// CHECK: br label %[[EUB_END_1:.+]] +// CHECK-DAG: [[EUB_FALSE_1]]: +// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: br label %[[EUB_END_1]] +// CHECK-DAG: [[EUB_END_1]]: +// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] +// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + +// Store LB in IV +// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], +// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + +// CHECK: [[DIST_INNER_LOOP_END]]: +// CHECK: br label %[[LOOP_EXIT:.+]] + +// loop exit +// CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret Index: test/OpenMP/distribute_parallel_for_simd_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -406,18 +406,16 @@ a[i] = b[i] + c[i]; // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // LAMBDA-DAG: [[EUB_TRUE]]: @@ -436,18 +434,9 @@ // check exit condition // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // LAMBDA: [[DIST_INNER_LOOP_BODY]]: @@ -466,13 +455,6 @@ // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] - - // LAMBDA: [[DIST_INNER_LOOP_END]]: - // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -481,10 +463,31 @@ // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // LAMBDA: [[DIST_OUTER_LOOP_END]]: + // Update UB + // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // LAMBDA-DAG: [[EUB_TRUE_1]]: + // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA: br label %[[EUB_END_1:.+]] + // LAMBDA-DAG: [[EUB_FALSE_1]]: + // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: br label %[[EUB_END_1]] + // LAMBDA-DAG: [[EUB_END_1]]: + // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // LAMBDA: [[DIST_INNER_LOOP_END]]: + // LAMBDA: br label %[[LOOP_EXIT:.+]] + + // loop exit + // LAMBDA: [[LOOP_EXIT]]: // LAMBDA-DAG: call void @__kmpc_for_static_fini( // LAMBDA: ret @@ -1154,18 +1157,17 @@ a[i] = b[i] + c[i]; // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, + // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1184,18 +1186,9 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // CHECK: [[DIST_OUTER_LOOP_BODY]]: - // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // CHECK: [[DIST_INNER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1214,13 +1207,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - - // CHECK: [[DIST_INNER_LOOP_END]]: - // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // CHECK: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1229,10 +1215,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // CHECK: [[DIST_OUTER_LOOP_END]]: + // Update UB + // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // CHECK-DAG: [[EUB_TRUE_1]]: + // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // CHECK: br label %[[EUB_END_1:.+]] + // CHECK-DAG: [[EUB_FALSE_1]]: + // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: br label %[[EUB_END_1]] + // CHECK-DAG: [[EUB_END_1]]: + // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // CHECK: [[DIST_INNER_LOOP_END]]: + // CHECK: br label %[[LOOP_EXIT:.+]] + + // loop exit + // CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret @@ -1867,18 +1874,17 @@ // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] -// CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, +// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1897,18 +1903,9 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], +// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] -// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - -// CHECK: [[DIST_OUTER_LOOP_BODY]]: -// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - -// CHECK: [[DIST_INNER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] -// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] +// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1927,13 +1924,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - -// CHECK: [[DIST_INNER_LOOP_END]]: -// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - -// CHECK: [[DIST_OUTER_LOOP_INC]]: -// check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1942,10 +1932,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] -// outer loop exit -// CHECK: [[DIST_OUTER_LOOP_END]]: +// Update UB +// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], +// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], +// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] +// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] +// CHECK-DAG: [[EUB_TRUE_1]]: +// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], +// CHECK: br label %[[EUB_END_1:.+]] +// CHECK-DAG: [[EUB_FALSE_1]]: +// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: br label %[[EUB_END_1]] +// CHECK-DAG: [[EUB_END_1]]: +// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] +// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + +// Store LB in IV +// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], +// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + +// CHECK: [[DIST_INNER_LOOP_END]]: +// CHECK: br label %[[LOOP_EXIT:.+]] + +// loop exit +// CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret