Index: include/clang/AST/StmtOpenMP.h =================================================================== --- include/clang/AST/StmtOpenMP.h +++ include/clang/AST/StmtOpenMP.h @@ -392,9 +392,11 @@ CombinedConditionOffset = 25, CombinedNextLowerBoundOffset = 26, CombinedNextUpperBoundOffset = 27, + CombinedDistConditionOffset = 28, + CombinedParForInDistConditionOffset = 29, // Offset to the end (and start of the following counters/updates/finals // arrays) for combined distribute loop directives. - CombinedDistributeEnd = 28, + CombinedDistributeEnd = 30, }; /// Get the counters storage. @@ -605,6 +607,17 @@ "expected loop bound sharing directive"); *std::next(child_begin(), CombinedNextUpperBoundOffset) = CombNUB; } + void setCombinedDistCond(Expr *CombDistCond) { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + *std::next(child_begin(), CombinedDistConditionOffset) = CombDistCond; + } + void setCombinedParForInDistCond(Expr *CombParForInDistCond) { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + *std::next(child_begin(), + CombinedParForInDistConditionOffset) = CombParForInDistCond; + } void setCounters(ArrayRef A); void setPrivateCounters(ArrayRef A); void setInits(ArrayRef A); @@ -637,6 +650,13 @@ /// Update of UpperBound for statically scheduled omp loops for /// outer loop in combined constructs (e.g. 'distribute parallel for') Expr *NUB; + /// Distribute Loop condition used when composing 'omp distribute' + /// with 'omp for' in a same construct when schedule is chunked. + Expr *DistCond; + /// 'omp parallel for' loop condition used when composed with + /// 'omp distribute' in the same construct and when schedule is + /// chunked and the chunk size is 1. + Expr *ParForInDistCond; }; /// The expressions built for the OpenMP loop CodeGen for the @@ -754,6 +774,8 @@ DistCombinedFields.Cond = nullptr; DistCombinedFields.NLB = nullptr; DistCombinedFields.NUB = nullptr; + DistCombinedFields.DistCond = nullptr; + DistCombinedFields.ParForInDistCond = nullptr; } }; @@ -922,6 +944,18 @@ return const_cast(reinterpret_cast( *std::next(child_begin(), CombinedNextUpperBoundOffset))); } + Expr *getCombinedDistCond() const { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), CombinedDistConditionOffset))); + } + Expr *getCombinedParForInDistCond() const { + assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) && + "expected loop bound distribute sharing directive"); + return const_cast(reinterpret_cast( + *std::next(child_begin(), CombinedParForInDistConditionOffset))); + } const Stmt *getBody() const { // This relies on the loop form is already checked by Sema. const Stmt *Body = Index: lib/AST/StmtOpenMP.cpp =================================================================== --- lib/AST/StmtOpenMP.cpp +++ lib/AST/StmtOpenMP.cpp @@ -1079,6 +1079,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); Dir->HasCancel = HasCancel; return Dir; } @@ -1145,6 +1147,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } @@ -1457,6 +1461,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } @@ -1524,6 +1530,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); Dir->HasCancel = HasCancel; return Dir; } @@ -1670,6 +1678,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); Dir->HasCancel = HasCancel; return Dir; } @@ -1741,6 +1751,8 @@ Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -890,6 +890,20 @@ virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const; + /// Check if the specified \a ScheduleKind is static chunked. + /// \param ScheduleKind Schedule kind specified in the 'schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const; + + /// Check if the specified \a ScheduleKind is static non-chunked. + /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind, + bool Chunked) const; + /// Check if the specified \a ScheduleKind is dynamic. /// This kind of worksharing directive is emitted without outer loop. /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause. @@ -1506,7 +1520,7 @@ /// schedule clause. virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - llvm::Value *&Chunk) const {} + const Expr *&ChunkExpr) const {} /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -3292,6 +3292,18 @@ return Schedule == OMP_dist_sch_static; } +bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const { + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); + return Schedule == OMP_sch_static_chunked; +} + +bool CGOpenMPRuntime::isStaticChunked( + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_dist_sch_static_chunked; +} bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { OpenMPSchedType Schedule = Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -348,7 +348,7 @@ /// Choose a default value for the schedule clause. void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - llvm::Value *&Chunk) const override; + const Expr *&ChunkExpr) const override; private: /// Track the execution mode when codegening directives within a target Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4247,8 +4247,11 @@ void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk( CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - llvm::Value *&Chunk) const { + const Expr *&ChunkExpr) const { ScheduleKind = OMPC_SCHEDULE_static; - Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize( - S.getIterationVariable()->getType()), 1); + // Chunk size is 1 in this case. + llvm::APInt ChunkSize(32, 1); + ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize, + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); } Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -2006,7 +2006,7 @@ RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); // for combined 'distribute' and 'for' the increment expression of distribute - // is store in DistInc. For 'distribute' alone, it is in Inc. + // is stored in DistInc. For 'distribute' alone, it is in Inc. Expr *IncExpr; if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) IncExpr = S.getDistInc(); @@ -2298,22 +2298,28 @@ (void)LoopScope.Privatize(); // Detect the loop schedule kind and chunk. - llvm::Value *Chunk = nullptr; + const Expr *ChunkExpr = nullptr; OpenMPScheduleTy ScheduleKind; if (const auto *C = S.getSingleClause()) { ScheduleKind.Schedule = C->getScheduleKind(); ScheduleKind.M1 = C->getFirstScheduleModifier(); ScheduleKind.M2 = C->getSecondScheduleModifier(); - if (const Expr *Ch = C->getChunkSize()) { - Chunk = EmitScalarExpr(Ch); - Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType(), - S.getBeginLoc()); - } + ChunkExpr = C->getChunkSize(); } else { // Default behaviour for schedule clause. CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( - *this, S, ScheduleKind.Schedule, Chunk); + *this, S, ScheduleKind.Schedule, ChunkExpr); + } + bool HasChunkSizeOne = false; + llvm::Value *Chunk = nullptr; + if (ChunkExpr) { + Chunk = EmitScalarExpr(ChunkExpr); + Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), + S.getIterationVariable()->getType(), + S.getBeginLoc()); + llvm::APSInt EvaluatedChunk; + if (ChunkExpr->EvaluateAsInt(EvaluatedChunk, getContext())) + HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); @@ -2321,8 +2327,12 @@ // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. - if (RT.isStaticNonchunked(ScheduleKind.Schedule, - /* Chunked */ Chunk != nullptr) && + bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + if ((RT.isStaticNonchunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) || + StaticChunkedOne) && !Ordered) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); @@ -2333,23 +2343,38 @@ // unspecified in this case. CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + UB.getAddress(), ST.getAddress(), + StaticChunkedOne ? Chunk : nullptr); RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + if (!StaticChunkedOne) + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; EmitIgnoredExpr(S.getInit()); - // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); + // For unchunked static schedule generate: + // + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // For static schedule with chunk one: + // + // while (IV <= PrevUB) { + // BODY; + // IV += ST; + // } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(), + StaticChunkedOne ? S.getDistInc() : S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { @@ -3345,13 +3370,18 @@ // iteration space is divided into chunks that are approximately equal // in size, and at most one chunk is distributed to each team of the // league. The size of the chunks is unspecified in this case. + bool StaticChunked = RT.isStaticChunked( + ScheduleKind, /* Chunked */ Chunk != nullptr) && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); if (RT.isStaticNonchunked(ScheduleKind, - /* Chunked */ Chunk != nullptr)) { + /* Chunked */ Chunk != nullptr) || + StaticChunked) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress()); + LB.getAddress(), UB.getAddress(), ST.getAddress(), + StaticChunked ? Chunk : nullptr); RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); JumpDest LoopExit = @@ -3370,15 +3400,45 @@ ? S.getCombinedCond() : S.getCond(); - // for distribute alone, codegen - // while (idx <= UB) { BODY; ++idx; } - // when combined with 'for' (e.g. as in 'distribute parallel for') - // while (idx <= UB) { ; idx += ST; } + if (StaticChunked) + Cond = S.getCombinedDistCond(); + + // For static unchunked schedules generate: + // + // 1. For distribute alone, codegen + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // 2. When combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { + // (LB, UB); + // idx += ST; + // } + // + // For static chunk one schedule generate: + // + // while (IV <= GlobalUB) { + // (LB, UB); + // LB += ST; + // UB += ST; + // UB = min(UB, GlobalUB); + // IV = LB; + // } + // EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { CodeGenLoop(CGF, S, LoopExit); }, - [](CodeGenFunction &) {}); + [&S, StaticChunked](CodeGenFunction &CGF) { + if (StaticChunked) { + CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); + CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedInit()); + } + }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -354,7 +354,7 @@ return OMPD_unknown; return std::next(Stack.back().first.rbegin())->Directive; } - + /// Add requires decl to internal vector void addRequiresDecl(OMPRequiresDecl *RD) { RequiresDecls.push_back(RD); @@ -381,7 +381,7 @@ } return IsDuplicate; } - + /// Set default data sharing attribute to none. void setDefaultDSANone(SourceLocation Loc) { assert(!isStackEmpty()); @@ -5201,6 +5201,12 @@ ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get()) : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(), NumIterations.get()); + ExprResult CombDistCond; + if (isOpenMPLoopBoundSharingDirective(DKind)) { + CombDistCond = + SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), NumIterations.get()); + } + ExprResult CombCond; if (isOpenMPLoopBoundSharingDirective(DKind)) { CombCond = @@ -5275,7 +5281,7 @@ // on PrevUB instead of NumIterations - used to implement 'for' when found // in combination with 'distribute', like in 'distribute parallel for' SourceLocation DistIncLoc = AStmt->getBeginLoc(); - ExprResult DistCond, DistInc, PrevEUB; + ExprResult DistCond, DistInc, PrevEUB, ParForInDistCond; if (isOpenMPLoopBoundSharingDirective(DKind)) { DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get()); assert(DistCond.isUsable() && "distribute cond expr was not built"); @@ -5298,6 +5304,11 @@ PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(), CondOp.get()); PrevEUB = SemaRef.ActOnFinishFullExpr(PrevEUB.get()); + + // Build IV <= PrevEUB to be used in parallel for is in combination with + // a distribute directive with schedule(static, 1) + ParForInDistCond = + SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), PrevUB.get()); } // Build updates and final values of the loop counters. @@ -5421,6 +5432,8 @@ Built.DistCombinedFields.Cond = CombCond.get(); Built.DistCombinedFields.NLB = CombNextLB.get(); Built.DistCombinedFields.NUB = CombNextUB.get(); + Built.DistCombinedFields.DistCond = CombDistCond.get(); + Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get(); return NestedLoopCount; } Index: lib/Serialization/ASTReaderStmt.cpp =================================================================== --- lib/Serialization/ASTReaderStmt.cpp +++ lib/Serialization/ASTReaderStmt.cpp @@ -1856,6 +1856,8 @@ D->setCombinedCond(Record.readSubExpr()); D->setCombinedNextLowerBound(Record.readSubExpr()); D->setCombinedNextUpperBound(Record.readSubExpr()); + D->setCombinedDistCond(Record.readSubExpr()); + D->setCombinedParForInDistCond(Record.readSubExpr()); } SmallVector Sub; unsigned CollapsedNum = D->getCollapsedNumber(); Index: lib/Serialization/ASTWriterStmt.cpp =================================================================== --- lib/Serialization/ASTWriterStmt.cpp +++ lib/Serialization/ASTWriterStmt.cpp @@ -1854,6 +1854,8 @@ Record.AddStmt(D->getCombinedCond()); Record.AddStmt(D->getCombinedNextLowerBound()); Record.AddStmt(D->getCombinedNextUpperBound()); + Record.AddStmt(D->getCombinedDistCond()); + Record.AddStmt(D->getCombinedParForInDistCond()); } for (auto I : D->counters()) { Record.AddStmt(I); Index: test/OpenMP/distribute_parallel_for_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_codegen.cpp +++ test/OpenMP/distribute_parallel_for_codegen.cpp @@ -407,18 +407,16 @@ a[i] = b[i] + c[i]; // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // LAMBDA-DAG: [[EUB_TRUE]]: @@ -437,18 +435,10 @@ // check exit condition // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 + // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] + // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // LAMBDA: [[DIST_INNER_LOOP_BODY]]: @@ -467,13 +457,6 @@ // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] - - // LAMBDA: [[DIST_INNER_LOOP_END]]: - // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -482,10 +465,31 @@ // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // LAMBDA: [[DIST_OUTER_LOOP_END]]: + // Update UB + // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // LAMBDA-DAG: [[EUB_TRUE_1]]: + // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA: br label %[[EUB_END_1:.+]] + // LAMBDA-DAG: [[EUB_FALSE_1]]: + // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: br label %[[EUB_END_1]] + // LAMBDA-DAG: [[EUB_END_1]]: + // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // LAMBDA: [[DIST_INNER_LOOP_END]]: + // LAMBDA: br label %[[LOOP_EXIT:.+]] + + // loop exit + // LAMBDA: [[LOOP_EXIT]]: // LAMBDA-DAG: call void @__kmpc_for_static_fini( // LAMBDA: ret @@ -1155,18 +1159,17 @@ a[i] = b[i] + c[i]; // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, + // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1185,18 +1188,10 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // CHECK: [[DIST_OUTER_LOOP_BODY]]: - // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // CHECK: [[DIST_INNER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], + // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 + // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] + // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1215,13 +1210,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - - // CHECK: [[DIST_INNER_LOOP_END]]: - // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // CHECK: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1230,10 +1218,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // CHECK: [[DIST_OUTER_LOOP_END]]: + // Update UB + // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // CHECK-DAG: [[EUB_TRUE_1]]: + // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // CHECK: br label %[[EUB_END_1:.+]] + // CHECK-DAG: [[EUB_FALSE_1]]: + // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: br label %[[EUB_END_1]] + // CHECK-DAG: [[EUB_END_1]]: + // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // CHECK: [[DIST_INNER_LOOP_END]]: + // CHECK: br label %[[LOOP_EXIT:.+]] + + // loop exit + // CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret @@ -1868,18 +1877,17 @@ // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] -// CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, +// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1898,18 +1906,10 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] -// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - -// CHECK: [[DIST_OUTER_LOOP_BODY]]: -// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - -// CHECK: [[DIST_INNER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] -// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] +// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], +// CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 +// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] +// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1928,13 +1928,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - -// CHECK: [[DIST_INNER_LOOP_END]]: -// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - -// CHECK: [[DIST_OUTER_LOOP_INC]]: -// check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1943,10 +1936,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] -// outer loop exit -// CHECK: [[DIST_OUTER_LOOP_END]]: +// Update UB +// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], +// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], +// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] +// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] +// CHECK-DAG: [[EUB_TRUE_1]]: +// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], +// CHECK: br label %[[EUB_END_1:.+]] +// CHECK-DAG: [[EUB_FALSE_1]]: +// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: br label %[[EUB_END_1]] +// CHECK-DAG: [[EUB_END_1]]: +// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] +// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + +// Store LB in IV +// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], +// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + +// CHECK: [[DIST_INNER_LOOP_END]]: +// CHECK: br label %[[LOOP_EXIT:.+]] + +// loop exit +// CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret Index: test/OpenMP/distribute_parallel_for_simd_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -406,18 +406,16 @@ a[i] = b[i] + c[i]; // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca - // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // LAMBDA-DAG: [[EUB_TRUE]]: @@ -436,18 +434,10 @@ // check exit condition // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: - // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 + // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] + // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // LAMBDA: [[DIST_INNER_LOOP_BODY]]: @@ -466,13 +456,6 @@ // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] - - // LAMBDA: [[DIST_INNER_LOOP_END]]: - // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // LAMBDA: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -481,10 +464,31 @@ // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // LAMBDA: [[DIST_OUTER_LOOP_END]]: + // Update UB + // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // LAMBDA-DAG: [[EUB_TRUE_1]]: + // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // LAMBDA: br label %[[EUB_END_1:.+]] + // LAMBDA-DAG: [[EUB_FALSE_1]]: + // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: br label %[[EUB_END_1]] + // LAMBDA-DAG: [[EUB_END_1]]: + // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // LAMBDA: [[DIST_INNER_LOOP_END]]: + // LAMBDA: br label %[[LOOP_EXIT:.+]] + + // loop exit + // LAMBDA: [[LOOP_EXIT]]: // LAMBDA-DAG: call void @__kmpc_for_static_fini( // LAMBDA: ret @@ -1154,18 +1158,17 @@ a[i] = b[i] + c[i]; // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] - // CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], - // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, + // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1184,18 +1187,10 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] - // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - - // CHECK: [[DIST_OUTER_LOOP_BODY]]: - // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - - // CHECK: [[DIST_INNER_LOOP_HEADER]]: - // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], - // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], - // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] - // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], + // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 + // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] + // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1214,13 +1209,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], - // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - - // CHECK: [[DIST_INNER_LOOP_END]]: - // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - - // CHECK: [[DIST_OUTER_LOOP_INC]]: - // check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1229,10 +1217,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], - // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] - // outer loop exit - // CHECK: [[DIST_OUTER_LOOP_END]]: + // Update UB + // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], + // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] + // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] + // CHECK-DAG: [[EUB_TRUE_1]]: + // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], + // CHECK: br label %[[EUB_END_1:.+]] + // CHECK-DAG: [[EUB_FALSE_1]]: + // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: br label %[[EUB_END_1]] + // CHECK-DAG: [[EUB_END_1]]: + // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] + // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + + // Store LB in IV + // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + + // CHECK: [[DIST_INNER_LOOP_END]]: + // CHECK: br label %[[LOOP_EXIT:.+]] + + // loop exit + // CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret @@ -1867,18 +1876,17 @@ // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] -// CHECK: [[DIST_OUTER_LOOP_HEADER]]: // check EUB for distribute // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], -// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, +// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] // CHECK-DAG: [[EUB_TRUE]]: @@ -1897,18 +1905,10 @@ // check exit condition // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] -// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] - -// CHECK: [[DIST_OUTER_LOOP_BODY]]: -// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] - -// CHECK: [[DIST_INNER_LOOP_HEADER]]: -// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], -// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], -// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] -// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] +// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]], +// CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 +// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] +// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] // check that PrevLB and PrevUB are passed to the 'for' // CHECK: [[DIST_INNER_LOOP_BODY]]: @@ -1927,13 +1927,6 @@ // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], -// CHECK: br label %[[DIST_INNER_LOOP_HEADER]] - -// CHECK: [[DIST_INNER_LOOP_END]]: -// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] - -// CHECK: [[DIST_OUTER_LOOP_INC]]: -// check NextLB and NextUB // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] @@ -1942,10 +1935,31 @@ // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], -// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] -// outer loop exit -// CHECK: [[DIST_OUTER_LOOP_END]]: +// Update UB +// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], +// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]], +// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] +// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] +// CHECK-DAG: [[EUB_TRUE_1]]: +// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]], +// CHECK: br label %[[EUB_END_1:.+]] +// CHECK-DAG: [[EUB_FALSE_1]]: +// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: br label %[[EUB_END_1]] +// CHECK-DAG: [[EUB_END_1]]: +// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] +// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], + +// Store LB in IV +// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], +// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], + +// CHECK: [[DIST_INNER_LOOP_END]]: +// CHECK: br label %[[LOOP_EXIT:.+]] + +// loop exit +// CHECK: [[LOOP_EXIT]]: // CHECK-DAG: call void @__kmpc_for_static_fini( // CHECK: ret