Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1180,6 +1180,15 @@ /// Try to apply information from loop guards for \p L to \p Expr. const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. + bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, + const SCEV *FoundRHS, + const Instruction *Context = nullptr); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1779,15 +1788,6 @@ const Instruction *Context); /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is - /// true in given Context. If Context is nullptr, then the found predicate is - /// true everywhere. - bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, - const SCEV *FoundRHS, - const Instruction *Context = nullptr); - - /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true in given Context. If Context is nullptr, then the found predicate is /// true everywhere. Index: llvm/include/llvm/Transforms/Scalar/InductiveUnrolling.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/InductiveUnrolling.h @@ -0,0 +1,19 @@ +#ifndef LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H +#define LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Loop; +class LPMUpdater; + +class InductiveUnrollingPass : public PassInfoMixin { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -154,6 +154,7 @@ #include "llvm/Transforms/Scalar/IVUsersPrinter.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InductiveUnrolling.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -399,6 +399,7 @@ LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) +LOOP_PASS("inductive-unroll", InductiveUnrollingPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -19,6 +19,7 @@ GVNSink.cpp IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp + InductiveUnrolling.cpp IndVarSimplify.cpp InferAddressSpaces.cpp InstSimplifyPass.cpp Index: llvm/lib/Transforms/Scalar/InductiveUnrolling.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/InductiveUnrolling.cpp @@ -0,0 +1,284 @@ +#include "llvm/Transforms/Scalar/InductiveUnrolling.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" + +using namespace llvm; + +#define DEBUG_TYPE "inductive-unroll" + +struct InductiveCheck { + InductiveCheck(BranchInst *Br, ICmpInst::Predicate Pred, PHINode *LHS, + Value *RHS, const SCEV *Step) + : Br(Br), Pred(Pred), LHS(LHS), RHS(RHS), Step(Step) {} + BranchInst *Br; + ICmpInst::Predicate Pred; + PHINode *LHS; + Value *RHS; + const SCEV *Step; +}; + +static bool skipLoop(const Loop &L, ScalarEvolution &SE) { + if (!L.isInnermost()) + return true; + if (!L.getLoopLatch() || !L.getLoopPreheader()) + return true; + auto *ExitCount = SE.getBackedgeTakenCount(&L); + if (isa(ExitCount)) + return true; + // TODO: support switches. + for (auto *BB : L.getBlocks()) + if (!isa(BB->getTerminator())) + return true; + return false; +} + +static bool collectCandidates(SmallVectorImpl &Checks, Loop &L, + ScalarEvolution &SE, DominatorTree &DT) { + assert(Checks.empty() && "Supposed to be an empty vector!"); + if (!L.isInnermost()) + return false; + BasicBlock *Latch = L.getLoopLatch(); + assert(Latch && "Should have a latch!"); + + for (BasicBlock *BB : L.getBlocks()) { + if (!DT.dominates(BB, Latch)) + continue; + using namespace PatternMatch; + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + BasicBlock *IfTrue, *IfFalse; + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), + m_BasicBlock(IfTrue), m_BasicBlock()))) + continue; + if (!isa(LHS)) + continue; + if (!L.contains(IfTrue) || L.contains(IfFalse)) + continue; + if (!L.isLoopInvariant(RHS)) + continue; + // TODO: Support case with inductive RHS. + auto *LHSS = dyn_cast(SE.getSCEV(LHS)); + if (!LHSS || !LHSS->isAffine() || LHSS->getLoop() != &L) + continue; + const SCEV *Step = LHSS->getOperand(1); + const SCEV *Next = SE.getAddExpr(LHSS, Step); + auto *Br = cast(BB->getTerminator()); + auto *RHSS = SE.getSCEV(RHS); + if (!SE.isImpliedCond(Pred, LHSS, RHSS, Pred, Next, RHSS, Br)) + continue; + Checks.push_back(InductiveCheck(Br, Pred, cast(LHS), RHS, Step)); + } + return !Checks.empty(); +} + +static Loop *cloneLoop(SmallVectorImpl &Checks, Loop &L, + LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, + AssumptionCache &AC, const TargetTransformInfo &TTI) { + SmallVector NewBlocks; + DenseMap BlockMap; + ValueToValueMapTy VMap; + + LoopBlocksRPO RPOT(&L); + RPOT.perform(&LI); + Loop *NewL = LI.AllocateLoop(); + Loop *ParentL = L.getParentLoop(); + if (ParentL) + ParentL->addChildLoop(NewL); + else + LI.addTopLevelLoop(NewL); + for (auto *BB : RPOT) { + auto *NewBB = CloneBasicBlock(BB, VMap, ".fallback", BB->getParent()); + BlockMap[BB] = NewBB; + NewBlocks.push_back(NewBB); + NewL->addBasicBlockToLoop(NewBB, LI); + } + + // Add new inputs to exit blocks. + SmallVector ExitBlocks; + L.getExitBlocks(ExitBlocks); + for (auto *ExitBB : ExitBlocks) { + SmallVector Preds; + for (auto *Pred : predecessors(ExitBB)) + Preds.push_back(Pred); + for (auto *Pred : Preds) { + if (!L.contains(Pred)) + continue; + for (auto &Insn : *ExitBB) { + auto *PN = dyn_cast(&Insn); + if (!PN) + break; + Value *V = PN->getIncomingValueForBlock(Pred); + if (VMap.count(V)) + V = VMap[V]; + PN->addIncoming(V, BlockMap[Pred]); + } + } + } + + // Fixup in-loop blocks. + for (auto *BB : NewBlocks) { + auto *BI = cast(BB->getTerminator()); + for (size_t I = 0; I < BI->getNumSuccessors(); I++) { + auto *Succ = BI->getSuccessor(I); + if (L.contains(Succ)) + // For loop blocks, bind the successors. + BI->setSuccessor(I, BlockMap[Succ]); + } + + for (auto &Insn : *BB) + for (size_t I = 0; I < Insn.getNumOperands(); I++) + if (VMap.count(Insn.getOperand(I))) + Insn.setOperand(I, VMap[Insn.getOperand(I)]); + + for (auto &PN : BB->phis()) + for (size_t I = 0; I < PN.getNumIncomingValues(); I++) + if (L.contains(PN.getIncomingBlock(I))) + PN.setIncomingBlock(I, BlockMap[PN.getIncomingBlock(I)]); + } + + // Channel the preheader. + auto *Preheader = L.getLoopPreheader(); + Function *F = Preheader->getParent(); + auto &Context = Preheader->getContext(); + BasicBlock *NewPreheader = + BasicBlock::Create(Context, "preheader.fallback", F); + if (ParentL) + ParentL->addBasicBlockToLoop(NewPreheader, LI); + IRBuilder<> Builder(NewPreheader); + BasicBlock *NewHeader = BlockMap[L.getHeader()]; + + for (auto &Check : Checks) { + auto *Br = Check.Br; + assert(L.contains(Br->getSuccessor(0)) && "Bad successor!"); + Br->setSuccessor(1, NewPreheader); + } + for (auto &Insn : *L.getHeader()) { + auto *PN = dyn_cast(&Insn); + if (!PN) + break; + auto *NewPN = cast(VMap[PN]); + for (size_t I = 0; I < NewPN->getNumIncomingValues(); I++) + if (NewPN->getIncomingBlock(I) == Preheader) { + auto *LCSSA = Builder.CreatePHI(PN->getType(), Checks.size(), + PN->getName() + ".lcssa"); + for (auto &Check : Checks) { + auto *Br = Check.Br; + LCSSA->addIncoming(PN, Br->getParent()); + } + NewPN->setIncomingValue(I, LCSSA); + NewPN->setIncomingBlock(I, NewPreheader); + break; + } + } + + Builder.CreateBr(NewHeader); + DT.recalculate(*F); + assert(DT.verify() && "Bad dom tree!"); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + LI.verify(DT); + + return NewL; +} + +static bool removeChecks(Loop &L, LoopInfo &LI, ScalarEvolution &SE, + DominatorTree &DT, AssumptionCache &AC, + const TargetTransformInfo &TTI) { + if (skipLoop(L, SE)) + return false; + SmallVector Checks; + if (!collectCandidates(Checks, L, SE, DT)) + return false; + + // FIXME: So far, handle only 1 check. + if (Checks.size() != 1) + return false; + + Loop *NewL = cloneLoop(Checks, L, LI, SE, DT, AC, TTI); + + // TODO: Dynamically adjust unroll factor. + const size_t UnrollFactor = 4; + UnrollLoopOptions ULO; + ULO.Count = UnrollFactor; + ULO.TripCount = 0; + ULO.Force = true; + ULO.AllowRuntime = true; + ULO.AllowExpensiveTripCount = true; + ULO.PreserveCondBr = true; + ULO.PreserveOnlyFirst = false; + ULO.TripMultiple = 1; + ULO.PeelCount = 0; + ULO.UnrollRemainder = false; + ULO.ForgetAllSCEV = true; + + UnrollLoop(&L, ULO, &LI, &SE, &DT, &AC, &TTI, + /*OptimizationRemarkEmitter*/ nullptr, /*PreserveLCSSA*/ true); + + assert(DT.verify() && "Bad dom tree!"); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + LI.verify(DT); + + SmallVector ToRemove; + BranchInst *LastCheck = nullptr; + BasicBlock *NewPH = NewL->getLoopPreheader(); + for (auto *BB : predecessors(NewPH)) { + assert(L.contains(BB) && "Not a loop block?"); + auto *Check = cast(BB->getTerminator()); + assert(Check->isConditional() && "Should be conditional!"); + if (!LastCheck || DT.dominates(LastCheck, Check)) + LastCheck = Check; + ToRemove.push_back(Check); + } + + for (auto *Check : ToRemove) { + LLVM_DEBUG(dbgs() << "Removing check " << *Check << "\n"); + Check->setCondition(ConstantInt::getTrue(Check->getContext())); + } + auto *Header = L.getHeader(); + auto *SplitPoint = Header->getFirstNonPHI(); + auto *CheckedBlock = + Header->splitBasicBlock(SplitPoint, Header->getName() + ".checked"); + L.addBasicBlockToLoop(CheckedBlock, LI); + auto *OldTerm = Header->getTerminator(); + IRBuilder<> Builder(OldTerm); + LLVM_DEBUG(dbgs() << "Updating check in block " << Header->getName() << "\n"); + SCEVExpander Exp(SE, L.getHeader()->getModule()->getDataLayout(), "check"); + auto &Check = Checks[0]; + + auto *Offset = Exp.expandCodeFor( + SE.getMulExpr(SE.getConstant(Check.LHS->getType(), UnrollFactor - 1), + Check.Step), + nullptr, OldTerm); + auto *NewLHS = Builder.CreateAdd(Check.LHS, Offset); + auto *NewRHS = Check.RHS; + auto *NewCond = Builder.CreateICmp(Check.Pred, NewLHS, NewRHS); + Builder.CreateCondBr(NewCond, CheckedBlock, NewPH); + for (auto &PN : NewPH->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(CheckedBlock), Header); + OldTerm->eraseFromParent(); + + Function *F = Header->getParent(); + DT.recalculate(*F); + SE.forgetTopmostLoop(&L); + assert(DT.verify() && "Bad dom tree!"); + LI.verify(DT); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + return true; +} + +PreservedAnalyses InductiveUnrollingPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &LU) { + if (removeChecks(L, AR.LI, AR.SE, AR.DT, AR.AC, AR.TTI)) + return getLoopPassPreservedAnalyses(); + return PreservedAnalyses::all(); +} Index: llvm/test/Transforms/InductiveUnroll/motivation.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InductiveUnroll/motivation.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=inductive-unroll %s | FileCheck %s + +target datalayout = "n8:16:32:64" + +define i32 @test_01(i32 %end, i32 %len) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[END_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_3]], [[BACKEDGE_3]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i32 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i32 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp slt i32 [[IV_FALLBACK]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[BACKEDGE_FALLBACK]], label [[FAILURE:%.*]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add nsw i32 [[IV_FALLBACK]], 1 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp slt i32 [[IV_NEXT_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[DONE]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i32 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: [[RC_1:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.1: +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i32 [[IV_NEXT]], 1 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp slt i32 [[IV_NEXT_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[DONE]] +; CHECK: loop.2: +; CHECK-NEXT: [[RC_2:%.*]] = icmp slt i32 [[IV_NEXT_1]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.2: +; CHECK-NEXT: [[IV_NEXT_2]] = add nuw nsw i32 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp slt i32 [[IV_NEXT_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[DONE]] +; CHECK: loop.3: +; CHECK-NEXT: [[RC_3:%.*]] = icmp slt i32 [[IV_NEXT_2]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.3: +; CHECK-NEXT: [[IV_NEXT_3]] = add nsw i32 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp slt i32 [[IV_NEXT_3]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[DONE]] +; +entry: + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %rc = icmp slt i32 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %iv.next = add nsw i32 %iv, 1 + %end.cond = icmp slt i32 %iv.next, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i32 [ %iv.next, %backedge ] + ret i32 %iv.next.lcssa + +failure: ; preds = %loop + ret i32 -1 +} + +declare void @side_effect() + +define i32 @test_02(i32 %end, i32 %len) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[END_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_3]], [[BACKEDGE_3]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i32 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i32 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp slt i32 [[IV_FALLBACK]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[BACKEDGE_FALLBACK]], label [[FAILURE:%.*]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add nsw i32 [[IV_FALLBACK]], 1 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp slt i32 [[IV_NEXT_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[DONE]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i32 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[RC_1:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.1: +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i32 [[IV_NEXT]], 1 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp slt i32 [[IV_NEXT_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[DONE]] +; CHECK: loop.2: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[RC_2:%.*]] = icmp slt i32 [[IV_NEXT_1]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.2: +; CHECK-NEXT: [[IV_NEXT_2]] = add nuw nsw i32 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp slt i32 [[IV_NEXT_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[DONE]] +; CHECK: loop.3: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[RC_3:%.*]] = icmp slt i32 [[IV_NEXT_2]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.3: +; CHECK-NEXT: [[IV_NEXT_3]] = add nsw i32 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp slt i32 [[IV_NEXT_3]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[DONE]] +; +entry: + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + call void @side_effect() + %rc = icmp slt i32 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %iv.next = add nsw i32 %iv, 1 + %end.cond = icmp slt i32 %iv.next, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i32 [ %iv.next, %backedge ] + ret i32 %iv.next.lcssa + +failure: ; preds = %loop + ret i32 -1 +}