Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1180,6 +1180,15 @@ /// Try to apply information from loop guards for \p L to \p Expr. const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. + bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, + const SCEV *FoundRHS, + const Instruction *Context = nullptr); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1778,15 +1787,6 @@ const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context); - /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is - /// true in given Context. If Context is nullptr, then the found predicate is - /// true everywhere. - bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, - const SCEV *FoundRHS, - const Instruction *Context = nullptr); - /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true in given Context. If Context is nullptr, then the found predicate is Index: llvm/include/llvm/Transforms/Scalar/InductiveUnrolling.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/InductiveUnrolling.h @@ -0,0 +1,19 @@ +#ifndef LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H +#define LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Loop; +class LPMUpdater; + +class InductiveUnrollingPass : public PassInfoMixin { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -154,6 +154,7 @@ #include "llvm/Transforms/Scalar/IVUsersPrinter.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InductiveUnrolling.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -399,6 +399,7 @@ LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) +LOOP_PASS("inductive-unroll", InductiveUnrollingPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -19,6 +19,7 @@ GVNSink.cpp IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp + InductiveUnrolling.cpp IndVarSimplify.cpp InferAddressSpaces.cpp InstSimplifyPass.cpp Index: llvm/lib/Transforms/Scalar/InductiveUnrolling.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/InductiveUnrolling.cpp @@ -0,0 +1,281 @@ +#include "llvm/Transforms/Scalar/InductiveUnrolling.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" + +using namespace llvm; + +#define DEBUG_TYPE "inductive-unroll" + +struct InductiveCheck { + InductiveCheck(BranchInst *Br, size_t ExitSucc, const SCEV *ExitCount) + : Br(Br), ExitSucc(ExitSucc), ExitCount(ExitCount) {} + BranchInst *Br; + size_t ExitSucc; + const SCEV *ExitCount; +}; + +static bool skipLoop(const Loop &L, ScalarEvolution &SE) { + if (!L.isInnermost()) + return true; + if (!L.getLoopLatch() || !L.getLoopPreheader()) + return true; + auto *ExitCount = SE.getBackedgeTakenCount(&L); + if (isa(ExitCount)) + return true; + // TODO: support switches. + for (auto *BB : L.getBlocks()) + if (!isa(BB->getTerminator())) + return true; + return false; +} + +static bool collectCandidates(SmallVectorImpl &Checks, Loop &L, + ScalarEvolution &SE, DominatorTree &DT, + const size_t UnrollFactor) { + assert(Checks.empty() && "Supposed to be an empty vector!"); + if (!L.isInnermost()) + return false; + BasicBlock *Latch = L.getLoopLatch(); + assert(Latch && "Should have a latch!"); + + for (BasicBlock *BB : L.getBlocks()) { + if (!DT.dominates(BB, Latch)) + continue; + using namespace PatternMatch; + ICmpInst::Predicate Pred; + BasicBlock *IfTrue, *IfFalse; + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(), m_Value()), m_BasicBlock(IfTrue), + m_BasicBlock(IfFalse)))) + continue; + auto *ExitCount = SE.getExitCount(&L, BB); + if (isa(ExitCount)) + continue; + auto *Br = cast(BB->getTerminator()); + const SCEV *Exceed = SE.getAddExpr( + ExitCount, SE.getConstant(ExitCount->getType(), UnrollFactor)); + if (!SE.isKnownPredicateAt(ICmpInst::ICMP_ULT, ExitCount, Exceed, Br)) + continue; + size_t ExitSucc = L.contains(IfTrue) ? 1 : 0; + Checks.push_back(InductiveCheck(Br, ExitSucc, ExitCount)); + } + return !Checks.empty(); +} + +static Loop *cloneLoop(SmallVectorImpl &Checks, Loop &L, + LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, + AssumptionCache &AC, const TargetTransformInfo &TTI) { + SmallVector NewBlocks; + DenseMap BlockMap; + ValueToValueMapTy VMap; + + LoopBlocksRPO RPOT(&L); + RPOT.perform(&LI); + Loop *NewL = LI.AllocateLoop(); + Loop *ParentL = L.getParentLoop(); + if (ParentL) + ParentL->addChildLoop(NewL); + else + LI.addTopLevelLoop(NewL); + for (auto *BB : RPOT) { + auto *NewBB = CloneBasicBlock(BB, VMap, ".fallback", BB->getParent()); + BlockMap[BB] = NewBB; + NewBlocks.push_back(NewBB); + NewL->addBasicBlockToLoop(NewBB, LI); + } + + // Add new inputs to exit blocks. + for (auto &It : BlockMap) { + BasicBlock *OldLB = It.first; + BasicBlock *NewLB = It.second; + for (auto *Succ : successors(OldLB)) + if (!L.contains(Succ)) + for (auto &PN : Succ->phis()) { + Value *Inc = PN.getIncomingValueForBlock(OldLB); + if (VMap.count(Inc)) + Inc = VMap[Inc]; + PN.addIncoming(Inc, NewLB); + } + } + + // Fixup in-loop blocks. + for (auto *BB : NewBlocks) { + auto *BI = cast(BB->getTerminator()); + for (size_t I = 0; I < BI->getNumSuccessors(); I++) { + auto *Succ = BI->getSuccessor(I); + if (L.contains(Succ)) + // For loop blocks, bind the successors. + BI->setSuccessor(I, BlockMap[Succ]); + } + + for (auto &Insn : *BB) + for (size_t I = 0; I < Insn.getNumOperands(); I++) + if (VMap.count(Insn.getOperand(I))) + Insn.setOperand(I, VMap[Insn.getOperand(I)]); + + for (auto &PN : BB->phis()) + for (size_t I = 0; I < PN.getNumIncomingValues(); I++) + if (L.contains(PN.getIncomingBlock(I))) + PN.setIncomingBlock(I, BlockMap[PN.getIncomingBlock(I)]); + } + + // Channel the preheader. + auto *Preheader = L.getLoopPreheader(); + Function *F = Preheader->getParent(); + auto &Context = Preheader->getContext(); + BasicBlock *NewPreheader = + BasicBlock::Create(Context, "preheader.fallback", F); + if (ParentL) + ParentL->addBasicBlockToLoop(NewPreheader, LI); + IRBuilder<> Builder(NewPreheader); + BasicBlock *NewHeader = BlockMap[L.getHeader()]; + + for (auto &Check : Checks) { + auto *Br = Check.Br; + assert(L.contains(Br->getSuccessor(1 - Check.ExitSucc)) && + "Bad successor!"); + for (auto &PN : Br->getSuccessor(Check.ExitSucc)->phis()) + PN.removeIncomingValue(Br->getParent()); + Br->setSuccessor(Check.ExitSucc, NewPreheader); + } + for (auto &Insn : *L.getHeader()) { + auto *PN = dyn_cast(&Insn); + if (!PN) + break; + auto *NewPN = cast(VMap[PN]); + for (size_t I = 0; I < NewPN->getNumIncomingValues(); I++) + if (NewPN->getIncomingBlock(I) == Preheader) { + auto *LCSSA = Builder.CreatePHI(PN->getType(), Checks.size(), + PN->getName() + ".lcssa"); + for (auto &Check : Checks) { + auto *Br = Check.Br; + LCSSA->addIncoming(PN, Br->getParent()); + } + NewPN->setIncomingValue(I, LCSSA); + NewPN->setIncomingBlock(I, NewPreheader); + break; + } + } + + Builder.CreateBr(NewHeader); + DT.recalculate(*F); + assert(DT.verify() && "Bad dom tree!"); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + LI.verify(DT); + + return NewL; +} + +static bool removeChecks(Loop &L, LoopInfo &LI, ScalarEvolution &SE, + DominatorTree &DT, AssumptionCache &AC, + const TargetTransformInfo &TTI) { + auto *F = L.getHeader()->getParent(); + assert(!verifyFunction(*F, &dbgs()) && "Bad in the beginning!"); + if (skipLoop(L, SE)) + return false; + const size_t UnrollFactor = 4; + SmallVector Checks; + if (!collectCandidates(Checks, L, SE, DT, UnrollFactor)) + return false; + + // FIXME: So far, handle only 1 check. + if (Checks.size() != 1) + return false; + + assert(!verifyFunction(*F, &dbgs()) && "Bad after candidate collection!"); + + Loop *NewL = cloneLoop(Checks, L, LI, SE, DT, AC, TTI); + assert(!verifyFunction(*F, &dbgs()) && "Bad after cloning!"); + + // TODO: Dynamically adjust unroll factor. + UnrollLoopOptions ULO; + ULO.Count = UnrollFactor; + ULO.TripCount = 0; + ULO.Force = true; + ULO.AllowRuntime = false; + ULO.AllowExpensiveTripCount = true; + ULO.PreserveCondBr = true; + ULO.PreserveOnlyFirst = false; + ULO.TripMultiple = 1; + ULO.PeelCount = 0; + ULO.UnrollRemainder = false; + ULO.ForgetAllSCEV = true; + + UnrollLoop(&L, ULO, &LI, &SE, &DT, &AC, &TTI, + /*OptimizationRemarkEmitter*/ nullptr, /*PreserveLCSSA*/ true); + + assert(!verifyFunction(*F, &dbgs()) && "Bad after unroll!"); + + assert(DT.verify() && "Bad dom tree!"); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + LI.verify(DT); + + SmallVector ToRemove; + BasicBlock *NewPH = NewL->getLoopPreheader(); + BasicBlock *FirstCheck = nullptr; + for (auto *BB : predecessors(NewPH)) { + assert(L.contains(BB) && "Not a loop block?"); + auto *Br = cast(BB->getTerminator()); + assert(Br->isConditional() && "Should be conditional!"); + ToRemove.push_back(Br); + if (!FirstCheck || DT.dominates(BB, FirstCheck)) + FirstCheck = BB; + } + + auto &InductiveCheck = Checks[0]; + for (auto *Br : ToRemove) { + LLVM_DEBUG(dbgs() << "Removing check in branch " << *Br << "\n"); + Br->setCondition( + ConstantInt::getBool(Br->getContext(), InductiveCheck.ExitSucc)); + } + auto *Header = L.getHeader(); + + auto *SplitPoint = Header->getFirstNonPHI(); + auto *CheckedBlock = + Header->splitBasicBlock(SplitPoint, Header->getName() + ".checked"); + L.addBasicBlockToLoop(CheckedBlock, LI); + auto *OldTerm = Header->getTerminator(); + IRBuilder<> Builder(OldTerm); + LLVM_DEBUG(dbgs() << "Updating check in block " << Header->getName() << "\n"); + SCEVExpander Exp(SE, L.getHeader()->getModule()->getDataLayout(), "check"); + + Type *IVType = InductiveCheck.ExitCount->getType(); + auto *CanonicalIV = + SE.getAddRecExpr(SE.getConstant(IVType, UnrollFactor - 1), + SE.getConstant(IVType, UnrollFactor), &L, SCEV::FlagNUW); + auto *NewLHS = Exp.expandCodeFor(CanonicalIV, nullptr, OldTerm); + auto *NewRHS = Exp.expandCodeFor(InductiveCheck.ExitCount, nullptr, OldTerm); + auto *NewCond = Builder.CreateICmp(ICmpInst::ICMP_ULT, NewLHS, NewRHS); + Builder.CreateCondBr(NewCond, CheckedBlock, NewPH); + OldTerm->eraseFromParent(); + for (auto &PN : NewPH->phis()) { + BasicBlock *FirstInputFrom = FirstCheck == Header ? CheckedBlock : FirstCheck; + PN.addIncoming(PN.getIncomingValueForBlock(FirstInputFrom), Header); + } + + DT.recalculate(*F); + SE.forgetTopmostLoop(&L); + assert(DT.verify() && "Bad dom tree!"); + LI.verify(DT); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + assert(!verifyFunction(*F, &dbgs()) && "Bad in the end!"); + return true; +} + +PreservedAnalyses InductiveUnrollingPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &LU) { + if (removeChecks(L, AR.LI, AR.SE, AR.DT, AR.AC, AR.TTI)) + return getLoopPassPreservedAnalyses(); + return PreservedAnalyses::all(); +} Index: llvm/test/Transforms/InductiveUnroll/motivation.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InductiveUnroll/motivation.ll @@ -0,0 +1,627 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=inductive-unroll %s | FileCheck %s + +target datalayout = "n8:16:32:64" + +define i64 @test_01(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BACKEDGE_3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVAR]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], [[LEN]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ult i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_3]], [[BACKEDGE_3]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i64 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add i64 [[IV_FALLBACK]], 1 +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp slt i64 [[IV_FALLBACK]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[BACKEDGE_FALLBACK]], label [[FAILURE:%.*]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[ADDR_FALLBACK:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_FALLBACK]] +; CHECK-NEXT: [[X_FALLBACK:%.*]] = load i32, i32* [[ADDR_FALLBACK]], align 4 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp eq i32 [[X_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[DONE]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i64 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ult i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[DONE]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV_NEXT_2]] = add nuw nsw i64 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[RC_2:%.*]] = icmp ult i64 [[IV_NEXT_1]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.2: +; CHECK-NEXT: [[ADDR_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR_2]], align 4 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp eq i32 [[X_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[DONE]] +; CHECK: loop.3: +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[RC_3:%.*]] = icmp ult i64 [[IV_NEXT_2]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.3: +; CHECK-NEXT: [[ADDR_3:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR_3]], align 4 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp eq i32 [[X_3]], [[END]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[DONE]] +; +entry: + %len = zext i32 %len.short to i64 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i64 %iv, 1 + %rc = icmp slt i64 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +declare void @side_effect() + +define i64 @test_02(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BACKEDGE_3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVAR]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], [[LEN]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ult i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_3]], [[BACKEDGE_3]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i64 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add i64 [[IV_FALLBACK]], 1 +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp slt i64 [[IV_FALLBACK]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[BACKEDGE_FALLBACK]], label [[FAILURE:%.*]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[ADDR_FALLBACK:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_FALLBACK]] +; CHECK-NEXT: [[X_FALLBACK:%.*]] = load i32, i32* [[ADDR_FALLBACK]], align 4 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp eq i32 [[X_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[DONE]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i64 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ult i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[DONE]] +; CHECK: loop.2: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_2]] = add nuw nsw i64 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[RC_2:%.*]] = icmp ult i64 [[IV_NEXT_1]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.2: +; CHECK-NEXT: [[ADDR_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR_2]], align 4 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp eq i32 [[X_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[DONE]] +; CHECK: loop.3: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[RC_3:%.*]] = icmp ult i64 [[IV_NEXT_2]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.3: +; CHECK-NEXT: [[ADDR_3:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR_3]], align 4 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp eq i32 [[X_3]], [[END]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[DONE]] +; +entry: + %len = zext i32 %len.short to i64 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, 1 + %rc = icmp slt i64 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_03(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BACKEDGE_3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVAR]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], [[LEN]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp uge i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 false, label [[PREHEADER_FALLBACK]], label [[BACKEDGE:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_3]], [[BACKEDGE_3]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i64 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add i64 [[IV_FALLBACK]], 1 +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp sge i64 [[IV_FALLBACK]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[FAILURE:%.*]], label [[BACKEDGE_FALLBACK]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[ADDR_FALLBACK:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_FALLBACK]] +; CHECK-NEXT: [[X_FALLBACK:%.*]] = load i32, i32* [[ADDR_FALLBACK]], align 4 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp eq i32 [[X_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[DONE]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i64 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp uge i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 false, label [[PREHEADER_FALLBACK]], label [[BACKEDGE_1]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[DONE]] +; CHECK: loop.2: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_2]] = add nuw nsw i64 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[RC_2:%.*]] = icmp uge i64 [[IV_NEXT_1]], [[LEN]] +; CHECK-NEXT: br i1 false, label [[PREHEADER_FALLBACK]], label [[BACKEDGE_2]] +; CHECK: backedge.2: +; CHECK-NEXT: [[ADDR_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR_2]], align 4 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp eq i32 [[X_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[DONE]] +; CHECK: loop.3: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[RC_3:%.*]] = icmp uge i64 [[IV_NEXT_2]], [[LEN]] +; CHECK-NEXT: br i1 false, label [[PREHEADER_FALLBACK]], label [[BACKEDGE_3]] +; CHECK: backedge.3: +; CHECK-NEXT: [[ADDR_3:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR_3]], align 4 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp eq i32 [[X_3]], [[END]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[DONE]] +; +entry: + %len = zext i32 %len.short to i64 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, 1 + %rc = icmp sge i64 %iv, %len + br i1 %rc, label %failure, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_04(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN_SHORT:%.*]], 0 +; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[FAILURE:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT]] to i64 +; CHECK-NEXT: [[LEN_MUNUS_1:%.*]] = add i64 [[LEN]], -1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BACKEDGE_3:%.*]] ], [ 0, [[PREHEADER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[LEN_MUNUS_1]], [[PREHEADER]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], [[LEN_MUNUS_1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], -1 +; CHECK-NEXT: [[RC:%.*]] = icmp ne i64 [[IV]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_3]], [[BACKEDGE_3]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure.loopexit: +; CHECK-NEXT: br label [[FAILURE]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i64 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add i64 [[IV_FALLBACK]], -1 +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp ne i64 [[IV_FALLBACK]], 0 +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[BACKEDGE_FALLBACK]], label [[FAILURE_LOOPEXIT:%.*]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[ADDR_FALLBACK:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_FALLBACK]] +; CHECK-NEXT: [[X_FALLBACK:%.*]] = load i32, i32* [[ADDR_FALLBACK]], align 4 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp eq i32 [[X_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[DONE]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i64 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_NEXT]], -1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ne i64 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[DONE]] +; CHECK: loop.2: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_2]] = add i64 [[IV_NEXT_1]], -1 +; CHECK-NEXT: [[RC_2:%.*]] = icmp ne i64 [[IV_NEXT_1]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.2: +; CHECK-NEXT: [[ADDR_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR_2]], align 4 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp eq i32 [[X_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[DONE]] +; CHECK: loop.3: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_3]] = add i64 [[IV_NEXT_2]], -1 +; CHECK-NEXT: [[RC_3:%.*]] = icmp ne i64 [[IV_NEXT_2]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.3: +; CHECK-NEXT: [[ADDR_3:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR_3]], align 4 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp eq i32 [[X_3]], [[END]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[DONE]] +; +entry: + %zero.check = icmp eq i32 %len.short, 0 + br i1 %zero.check, label %failure, label %preheader + +preheader: + %len = zext i32 %len.short to i64 + %len.munus.1 = add i64 %len, -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %len.munus.1, %preheader ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, -1 + %rc = icmp ne i64 %iv, 0 + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_05(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN_SHORT:%.*]], 0 +; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[FAILURE:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT]] to i64 +; CHECK-NEXT: [[LEN_MUNUS_1:%.*]] = add i64 [[LEN]], -1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BACKEDGE_3:%.*]] ], [ 0, [[PREHEADER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[LEN_MUNUS_1]], [[PREHEADER]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], [[LEN_MUNUS_1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], -1 +; CHECK-NEXT: [[RC:%.*]] = icmp ne i64 [[IV]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_3]], [[BACKEDGE_3]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure.loopexit: +; CHECK-NEXT: br label [[FAILURE]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i64 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add i64 [[IV_FALLBACK]], -1 +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp ne i64 [[IV_FALLBACK]], 0 +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[BACKEDGE_FALLBACK]], label [[FAILURE_LOOPEXIT:%.*]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[ADDR_FALLBACK:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_FALLBACK]] +; CHECK-NEXT: [[X_FALLBACK:%.*]] = load i32, i32* [[ADDR_FALLBACK]], align 4 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp eq i32 [[X_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[DONE]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i64 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_NEXT]], -1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ne i64 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[DONE]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV_NEXT_2]] = add i64 [[IV_NEXT_1]], -1 +; CHECK-NEXT: [[RC_2:%.*]] = icmp ne i64 [[IV_NEXT_1]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.2: +; CHECK-NEXT: [[ADDR_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR_2]], align 4 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp eq i32 [[X_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[DONE]] +; CHECK: loop.3: +; CHECK-NEXT: [[IV_NEXT_3]] = add i64 [[IV_NEXT_2]], -1 +; CHECK-NEXT: [[RC_3:%.*]] = icmp ne i64 [[IV_NEXT_2]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.3: +; CHECK-NEXT: [[ADDR_3:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR_3]], align 4 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp eq i32 [[X_3]], [[END]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[DONE]] +; +entry: + %zero.check = icmp eq i32 %len.short, 0 + br i1 %zero.check, label %failure, label %preheader + +preheader: + %len = zext i32 %len.short to i64 + %len.munus.1 = add i64 %len, -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %len.munus.1, %preheader ], [ %iv.next, %backedge ] + %iv.next = add i64 %iv, -1 + %rc = icmp ne i64 %iv, 0 + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_06(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_BACKEDGE:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BACKEDGE_3:%.*]] ], [ 0, [[OUTER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[OUTER]] ], [ [[IV_NEXT_3:%.*]], [[BACKEDGE_3]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], [[LEN]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_CHECKED:%.*]], label [[PREHEADER_FALLBACK:%.*]] +; CHECK: loop.checked: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ult i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[OUTER_BACKEDGE_LOOPEXIT3:%.*]] +; CHECK: outer_backedge.loopexit: +; CHECK-NEXT: [[IV_NEXT_LCSSA2_PH:%.*]] = phi i64 [ [[IV_NEXT_FALLBACK:%.*]], [[BACKEDGE_FALLBACK:%.*]] ] +; CHECK-NEXT: br label [[OUTER_BACKEDGE]] +; CHECK: outer_backedge.loopexit3: +; CHECK-NEXT: [[IV_NEXT_LCSSA2_PH4:%.*]] = phi i64 [ [[IV_NEXT_3]], [[BACKEDGE_3]] ], [ [[IV_NEXT_2:%.*]], [[BACKEDGE_2:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ], [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: br label [[OUTER_BACKEDGE]] +; CHECK: outer_backedge: +; CHECK-NEXT: [[IV_NEXT_LCSSA2:%.*]] = phi i64 [ [[IV_NEXT_LCSSA2_PH]], [[OUTER_BACKEDGE_LOOPEXIT:%.*]] ], [ [[IV_NEXT_LCSSA2_PH4]], [[OUTER_BACKEDGE_LOOPEXIT3]] ] +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i64 [[OUTER_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[OUTER_COND]], label [[OUTER]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT_LCSSA2]], [[OUTER_BACKEDGE]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.fallback: +; CHECK-NEXT: [[IV_FALLBACK:%.*]] = phi i64 [ [[IV_LCSSA:%.*]], [[PREHEADER_FALLBACK]] ], [ [[IV_NEXT_FALLBACK]], [[BACKEDGE_FALLBACK]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_FALLBACK]] = add i64 [[IV_FALLBACK]], 1 +; CHECK-NEXT: [[RC_FALLBACK:%.*]] = icmp slt i64 [[IV_FALLBACK]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_FALLBACK]], label [[BACKEDGE_FALLBACK]], label [[FAILURE:%.*]] +; CHECK: backedge.fallback: +; CHECK-NEXT: [[ADDR_FALLBACK:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_FALLBACK]] +; CHECK-NEXT: [[X_FALLBACK:%.*]] = load i32, i32* [[ADDR_FALLBACK]], align 4 +; CHECK-NEXT: [[END_COND_FALLBACK:%.*]] = icmp eq i32 [[X_FALLBACK]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_FALLBACK]], label [[LOOP_FALLBACK:%.*]], label [[OUTER_BACKEDGE_LOOPEXIT]] +; CHECK: preheader.fallback: +; CHECK-NEXT: [[IV_LCSSA]] = phi i64 [ [[IV]], [[LOOP_CHECKED]] ], [ [[IV_NEXT]], [[LOOP_1]] ], [ [[IV_NEXT_1]], [[LOOP_2:%.*]] ], [ [[IV_NEXT_2]], [[LOOP_3:%.*]] ], [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: br label [[LOOP_FALLBACK]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ult i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP_2]], label [[OUTER_BACKEDGE_LOOPEXIT3]] +; CHECK: loop.2: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_2]] = add nuw nsw i64 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[RC_2:%.*]] = icmp ult i64 [[IV_NEXT_1]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.2: +; CHECK-NEXT: [[ADDR_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR_2]], align 4 +; CHECK-NEXT: [[END_COND_2:%.*]] = icmp eq i32 [[X_2]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_2]], label [[LOOP_3]], label [[OUTER_BACKEDGE_LOOPEXIT3]] +; CHECK: loop.3: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[RC_3:%.*]] = icmp ult i64 [[IV_NEXT_2]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3]], label [[PREHEADER_FALLBACK]] +; CHECK: backedge.3: +; CHECK-NEXT: [[ADDR_3:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR_3]], align 4 +; CHECK-NEXT: [[END_COND_3:%.*]] = icmp eq i32 [[X_3]], [[END]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[END_COND_3]], label [[LOOP]], label [[OUTER_BACKEDGE_LOOPEXIT3]] +; +entry: + %len = zext i32 %len.short to i64 + br label %outer + +outer: + %outer.iv = phi i64 [0, %entry], [%outer.iv.next, %outer_backedge] + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %outer ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, 1 + %rc = icmp slt i64 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %outer_backedge + +outer_backedge: + %outer.iv.next = add i64 %outer.iv, 1 + %outer.cond = icmp slt i64 %outer.iv.next, 1000 + br i1 %outer.cond, label %outer, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %outer_backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +}