Index: llvm/include/llvm/Transforms/Scalar/InductiveUnrolling.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/InductiveUnrolling.h @@ -0,0 +1,32 @@ +//===- InductiveUnrolling.h - Inductive Unrolling ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the Inductive Unrolling pass. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H +#define LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Loop; +class LPMUpdater; + +class InductiveUnrollingPass : public PassInfoMixin { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_INDUCTIVEUNROLLING_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -154,6 +154,7 @@ #include "llvm/Transforms/Scalar/IVUsersPrinter.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InductiveUnrolling.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -399,6 +399,7 @@ LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) +LOOP_PASS("inductive-unroll", InductiveUnrollingPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -19,6 +19,7 @@ GVNSink.cpp IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp + InductiveUnrolling.cpp IndVarSimplify.cpp InferAddressSpaces.cpp InstSimplifyPass.cpp Index: llvm/lib/Transforms/Scalar/InductiveUnrolling.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/InductiveUnrolling.cpp @@ -0,0 +1,374 @@ +//===- InductiveUnrolling.cpp - Inductive Unrolling -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This transformation removes loop checks with known exit count via unrolling. +// The idea of this transform is following: +// +// Let EC be symbolic exit count of a given loop (which means that if this +// branch is executed on iteration number ExitCount, it must exit): +// +// do { +// ... +// if (cond()) break; // <- Must exit on iteration number EC. +// ... +// } while (true); +// +// Let us unroll this loop with the factor of UF: +// +// do { +// ... +// if (cond()) break; // Corresponds to iteration N * UF + 0. +// ... +// if (cond()) break; // Corresponds to iteration N * UF + 1. +// ... +// if (cond()) break; // Corresponds to iteration N * UF + 2. +// +// ... +// +// if (cond()) break; // Corresponds to iteration N * UF + UF - 1. +// ... +// } while(true); +// +// Let Rem = EC % UF. Note that if Rem = 2, then only the check N * UF + 2 may +// possibly exit, and no other clone of this check can. This allows us to +// execute it only once per UF iterations. +// +// Unfortunately, in general case, we do not know the value of Rem. But we can +// still compute it in runtime and insert a preloop which will enter the +// unrolled loop on the Rem'th iteration of the original loop. Therefore we can +// only execute the first cloned check, and no other clone may possibly exit. +// The resulting solution will be: +// +// for (int i = 0; i < Rem; i++) { +// ... +// if (cond()) goto exit; +// ... +// do { +// ... +// if (cond()) break; // Corresponds to iteration N * UF + Rem. +// ... +// if (false) break; // Corresponds to iteration N * UF + Rem + 1. +// ... +// if (false) break; // Corresponds to iteration N * UF + Rem + 2. +// +// ... +// +// if (false) break; // Corresponds to iteration N * UF + Rem + UF - 1. +// ... +// } while(true); +// +// exit: +// ... +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Transforms/Scalar/InductiveUnrolling.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" + +using namespace llvm; + +#define DEBUG_TYPE "inductive-unroll" + +static cl::opt MaxUnrolledSize("inductive-unroll-max-unrolled-size", + cl::Hidden, cl::init(150)); + +struct InductiveCheck { + InductiveCheck(BranchInst *Br, const SCEV *ExitCount) + : Br(Br), ExitCount(ExitCount) {} + BranchInst *Br; + const SCEV *ExitCount; +}; + +static bool skipLoop(const Loop &L, ScalarEvolution &SE) { + if (!L.isInnermost()) + return true; + if (!L.getLoopLatch() || !L.getLoopPreheader()) + return true; + return false; +} + +static bool collectCandidates(SmallVectorImpl &Checks, Loop &L, + ScalarEvolution &SE, DominatorTree &DT, + unsigned UnrollFactor) { + assert(Checks.empty() && "Supposed to be an empty vector!"); + if (!L.isInnermost()) + return false; + BasicBlock *Latch = L.getLoopLatch(); + assert(Latch && "Should have a latch!"); + + for (BasicBlock *BB : L.getBlocks()) { + if (!DT.dominates(BB, Latch)) + continue; + using namespace PatternMatch; + if (!match(BB->getTerminator(), + m_Br(m_Value(), m_BasicBlock(), m_BasicBlock()))) + continue; + auto *ExitCount = SE.getExitCount(&L, BB); + if (isa(ExitCount)) + continue; + auto *Br = cast(BB->getTerminator()); + Checks.push_back(InductiveCheck(Br, ExitCount)); + } + return !Checks.empty(); +} + +static void createPreloop(const InductiveCheck &Check, Loop &L, + const unsigned UnrollFactor, LoopInfo &LI, + ScalarEvolution &SE, DominatorTree &DT, + AssumptionCache &AC, const TargetTransformInfo &TTI) { + // TODO: We should be able to use CloneLoopBlocks here instead, but currently + // it does not handle multi-exit blocks correctly. + SmallVector NewBlocks; + DenseMap BlockMap; + ValueToValueMapTy VMap; + + LoopBlocksRPO RPOT(&L); + RPOT.perform(&LI); + Loop *NewL = LI.AllocateLoop(); + Loop *ParentL = L.getParentLoop(); + if (ParentL) + ParentL->addChildLoop(NewL); + else + LI.addTopLevelLoop(NewL); + for (auto *BB : RPOT) { + auto *NewBB = CloneBasicBlock(BB, VMap, ".preloop", BB->getParent()); + BlockMap[BB] = NewBB; + NewBlocks.push_back(NewBB); + NewL->addBasicBlockToLoop(NewBB, LI); + } + + // Add new inputs to exit blocks. + for (auto &It : BlockMap) { + BasicBlock *OldLB = It.first; + BasicBlock *NewLB = It.second; + for (auto *Succ : successors(OldLB)) + if (!L.contains(Succ)) + for (auto &PN : Succ->phis()) { + Value *Inc = PN.getIncomingValueForBlock(OldLB); + if (VMap.count(Inc)) + Inc = VMap[Inc]; + PN.addIncoming(Inc, NewLB); + } + } + + // Fixup in-loop blocks. + for (auto *BB : NewBlocks) { + auto *TI = BB->getTerminator(); + for (size_t I = 0; I < TI->getNumSuccessors(); I++) { + auto *Succ = TI->getSuccessor(I); + if (L.contains(Succ)) { + // For loop blocks, bind the successors. + TI->setSuccessor(I, BlockMap[Succ]); + } + } + + for (auto &Insn : *BB) + for (size_t I = 0; I < Insn.getNumOperands(); I++) + if (VMap.count(Insn.getOperand(I))) + Insn.setOperand(I, VMap[Insn.getOperand(I)]); + + for (auto &PN : BB->phis()) + for (size_t I = 0; I < PN.getNumIncomingValues(); I++) + if (L.contains(PN.getIncomingBlock(I))) + PN.setIncomingBlock(I, BlockMap[PN.getIncomingBlock(I)]); + } + + // Channel the preheader. + auto *OrigPreheader = L.getLoopPreheader(); + auto &Context = OrigPreheader->getContext(); + Function *F = OrigPreheader->getParent(); + + // Channel original preheader into the preloop. + auto *OrigPredTerm = cast(OrigPreheader->getTerminator()); + assert(OrigPredTerm->isUnconditional() && "Conditional branch in preheader?"); + auto *OrigHeader = L.getHeader(); + SCEVExpander Exp(SE, OrigHeader->getModule()->getDataLayout(), + "preloop.check"); + Type *ExitTy = Check.ExitCount->getType(); + const SCEV *UnrollFactorSCEV = SE.getConstant(ExitTy, UnrollFactor); + const SCEV *RemSCEV = SE.getURemExpr(Check.ExitCount, UnrollFactorSCEV); + + // Expand required values. + auto *Rem = Exp.expandCodeFor(RemSCEV, nullptr, OrigPredTerm); + + BasicBlock *NewHeader = BlockMap[OrigHeader]; + // Channel original preheader into the preloop. + { + IRBuilder<> Builder(OrigPredTerm); + Builder.CreateBr(NewHeader); + OrigPredTerm->eraseFromParent(); + } + + BasicBlock *NewLatch = BlockMap[L.getLoopLatch()]; + // Add exit to new preheader. + BasicBlock *NewPreheader = + BasicBlock::Create(Context, OrigPreheader->getName() + ".split", F); + if (ParentL) + ParentL->addBasicBlockToLoop(NewPreheader, LI); + + // Add a canonical IV into the preloop. + { + IRBuilder<> Builder(NewHeader->getFirstNonPHI()); + auto *CanonicalIV = Builder.CreatePHI(ExitTy, 2, "canonical.iv"); + CanonicalIV->addIncoming(ConstantInt::get(ExitTy, 0), OrigPreheader); + Builder.SetInsertPoint(NewLatch->getTerminator()); + auto *Increment = + Builder.CreateAdd(CanonicalIV, ConstantInt::get(ExitTy, 1), + CanonicalIV->getName() + ".inc"); + CanonicalIV->addIncoming(Increment, NewLatch); + auto *Split = NewHeader->splitBasicBlock(NewHeader->getFirstNonPHI(), + NewHeader->getName() + ".split"); + NewL->addBasicBlockToLoop(Split, LI); + auto *Term = NewHeader->getTerminator(); + Builder.SetInsertPoint(Term); + auto *RemainderCheck = + Builder.CreateICmpULT(CanonicalIV, Rem, "remainder.check"); + Builder.CreateCondBr(RemainderCheck, Split, NewPreheader); + Term->eraseFromParent(); + } + + // Bind Phis properly. + { + IRBuilder<> Builder(NewPreheader); + // Form the new preheader. + for (auto &PN : OrigHeader->phis()) { + Value *Incoming = VMap[&PN]; + auto *LCSSA = + Builder.CreatePHI(PN.getType(), 1, Incoming->getName() + ".lcssa"); + LCSSA->addIncoming(Incoming, NewHeader); + for (size_t I = 0; I < PN.getNumIncomingValues(); I++) + if (PN.getIncomingBlock(I) == OrigPreheader) { + PN.setIncomingBlock(I, NewPreheader); + PN.setIncomingValue(I, LCSSA); + break; + } + } + Builder.CreateBr(OrigHeader); + } + + // Sync up the DT. + SmallVector Updates; + Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); + Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); + Updates.push_back({DominatorTree::Insert, NewPreheader, OrigHeader}); + for (auto *BB : NewL->blocks()) { + SmallPtrSet UniqueSucc; + for (auto *Succ : successors(BB)) + if (UniqueSucc.insert(Succ).second) + Updates.push_back({DominatorTree::Insert, BB, Succ}); + } + DT.applyUpdates(Updates); + + // Split critical edge for the check. + auto *Exit = L.contains(Check.Br->getSuccessor(0)) + ? Check.Br->getSuccessor(1) + : Check.Br->getSuccessor(0); + if (!Exit->getUniquePredecessor()) + SplitEdge(Check.Br->getParent(), Exit, &DT, &LI); + +#ifndef NDEBUG + // Make sure that the entire function is fine after the manipulations. + assert(DT.verify() && "Bad dom tree!"); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + LI.verify(DT); +#endif +} + +static void rewriteChecks(InductiveCheck &Check, Loop &L) { + auto &Context = L.getHeader()->getContext(); + auto *Exit = L.contains(Check.Br->getSuccessor(0)) + ? Check.Br->getSuccessor(1) + : Check.Br->getSuccessor(0); + Value *Cond = + ConstantInt::getBool(Context, Check.Br->getSuccessor(1) == Exit); + for (auto *Pred : predecessors(Exit)) { + auto *Br = cast(Pred->getTerminator()); + assert(L.contains(Br) && "Out of loop predecessor?"); + assert(Br->isConditional() && "Impossible for a loop block"); + if (Br != Check.Br) + Br->setCondition(Cond); + } +} + +static bool removeChecks(Loop &L, LoopInfo &LI, ScalarEvolution &SE, + DominatorTree &DT, AssumptionCache &AC, + const TargetTransformInfo &TTI) { + if (skipLoop(L, SE)) + return false; + + unsigned NumCalls = 0; + bool NotDuplicatable = false, Convergent = false; + SmallPtrSet EphValues; + const unsigned LoopSize = ApproximateLoopSize( + &L, NumCalls, NotDuplicatable, Convergent, TTI, EphValues, /*BEInsns*/ 2); + if (LoopSize == 0 || NotDuplicatable || Convergent) + return false; + + const unsigned UnrollFactor = MaxUnrolledSize / LoopSize; + if (UnrollFactor < 2) + return false; + + SmallVector Checks; + if (!collectCandidates(Checks, L, SE, DT, UnrollFactor)) + return false; + + // FIXME: So far, handle only 1 check. + if (Checks.size() != 1) + return false; + + auto &InductiveCheck = Checks[0]; + createPreloop(InductiveCheck, L, UnrollFactor, LI, SE, DT, AC, TTI); + + UnrollLoopOptions ULO; + ULO.Count = UnrollFactor; + ULO.TripCount = 0; + ULO.Force = true; + ULO.AllowRuntime = false; + ULO.AllowExpensiveTripCount = true; + ULO.PreserveCondBr = true; + ULO.PreserveOnlyFirst = false; + ULO.TripMultiple = 1; + ULO.PeelCount = 0; + ULO.UnrollRemainder = false; + ULO.ForgetAllSCEV = true; + + UnrollLoop(&L, ULO, &LI, &SE, &DT, &AC, &TTI, + /*OptimizationRemarkEmitter*/ nullptr, /*PreserveLCSSA*/ true); + + rewriteChecks(InductiveCheck, L); + +#ifndef NDEBUG + // Make sure that the entire function is fine after all transforms. + assert(DT.verify() && "Bad dom tree!"); + assert(L.isLCSSAForm(DT) && "LCSSA form broken"); + LI.verify(DT); +#endif + + return true; +} + +PreservedAnalyses InductiveUnrollingPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &LU) { + if (removeChecks(L, AR.LI, AR.SE, AR.DT, AR.AC, AR.TTI)) + return getLoopPassPreservedAnalyses(); + return PreservedAnalyses::all(); +} Index: llvm/test/Transforms/InductiveUnroll/motivation.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InductiveUnroll/motivation.ll @@ -0,0 +1,527 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=inductive-unroll -inductive-unroll-max-unrolled-size=20 %s | FileCheck %s + +target datalayout = "n8:16:32:64" + +define i64 @test_01(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[LEN_SHORT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i64 +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_PRELOOP_LCSSA:%.*]], [[ENTRY_SPLIT:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ult i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE:%.*]], label [[LOOP_FAILURE_CRIT_EDGE:%.*]] +; CHECK: loop.failure_crit_edge: +; CHECK-NEXT: br label [[FAILURE:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_PRELOOP:%.*]], [[BACKEDGE_PRELOOP:%.*]] ], [ [[IV_NEXT_1]], [[BACKEDGE_1]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.preloop: +; CHECK-NEXT: [[IV_PRELOOP:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_PRELOOP]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[CANONICAL_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[CANONICAL_IV_INC:%.*]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[REMAINDER_CHECK:%.*]] = icmp ult i64 [[CANONICAL_IV]], [[TMP1]] +; CHECK-NEXT: br i1 [[REMAINDER_CHECK]], label [[LOOP_PRELOOP_SPLIT:%.*]], label [[ENTRY_SPLIT]] +; CHECK: loop.preloop.split: +; CHECK-NEXT: [[IV_NEXT_PRELOOP]] = add i64 [[IV_PRELOOP]], 1 +; CHECK-NEXT: [[RC_PRELOOP:%.*]] = icmp slt i64 [[IV_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_PRELOOP]], label [[BACKEDGE_PRELOOP]], label [[FAILURE]] +; CHECK: backedge.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_PRELOOP]] +; CHECK-NEXT: [[X_PRELOOP:%.*]] = load i32, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[END_COND_PRELOOP:%.*]] = icmp eq i32 [[X_PRELOOP]], [[END]] +; CHECK-NEXT: [[CANONICAL_IV_INC]] = add i64 [[CANONICAL_IV]], 1 +; CHECK-NEXT: br i1 [[END_COND_PRELOOP]], label [[LOOP_PRELOOP]], label [[DONE]] +; CHECK: entry.split: +; CHECK-NEXT: [[IV_PRELOOP_LCSSA]] = phi i64 [ [[IV_PRELOOP]], [[LOOP_PRELOOP]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ult i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[LOOP_FAILURE_CRIT_EDGE]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP]], label [[DONE]] +; +entry: + %len = zext i32 %len.short to i64 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i64 %iv, 1 + %rc = icmp slt i64 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +declare void @side_effect() + +define i64 @test_02(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[LEN_SHORT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i64 +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_PRELOOP_LCSSA:%.*]], [[ENTRY_SPLIT:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ult i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE:%.*]], label [[LOOP_FAILURE_CRIT_EDGE:%.*]] +; CHECK: loop.failure_crit_edge: +; CHECK-NEXT: br label [[FAILURE:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_PRELOOP:%.*]], [[BACKEDGE_PRELOOP:%.*]] ], [ [[IV_NEXT_1]], [[BACKEDGE_1]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.preloop: +; CHECK-NEXT: [[IV_PRELOOP:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_PRELOOP]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[CANONICAL_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[CANONICAL_IV_INC:%.*]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[REMAINDER_CHECK:%.*]] = icmp ult i64 [[CANONICAL_IV]], [[TMP1]] +; CHECK-NEXT: br i1 [[REMAINDER_CHECK]], label [[LOOP_PRELOOP_SPLIT:%.*]], label [[ENTRY_SPLIT]] +; CHECK: loop.preloop.split: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_PRELOOP]] = add i64 [[IV_PRELOOP]], 1 +; CHECK-NEXT: [[RC_PRELOOP:%.*]] = icmp slt i64 [[IV_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_PRELOOP]], label [[BACKEDGE_PRELOOP]], label [[FAILURE]] +; CHECK: backedge.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_PRELOOP]] +; CHECK-NEXT: [[X_PRELOOP:%.*]] = load i32, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[END_COND_PRELOOP:%.*]] = icmp eq i32 [[X_PRELOOP]], [[END]] +; CHECK-NEXT: [[CANONICAL_IV_INC]] = add i64 [[CANONICAL_IV]], 1 +; CHECK-NEXT: br i1 [[END_COND_PRELOOP]], label [[LOOP_PRELOOP]], label [[DONE]] +; CHECK: entry.split: +; CHECK-NEXT: [[IV_PRELOOP_LCSSA]] = phi i64 [ [[IV_PRELOOP]], [[LOOP_PRELOOP]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ult i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[LOOP_FAILURE_CRIT_EDGE]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP]], label [[DONE]] +; +entry: + %len = zext i32 %len.short to i64 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, 1 + %rc = icmp slt i64 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_03(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[LEN_SHORT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i64 +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_PRELOOP_LCSSA:%.*]], [[ENTRY_SPLIT:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp uge i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[LOOP_FAILURE_CRIT_EDGE:%.*]], label [[BACKEDGE:%.*]] +; CHECK: loop.failure_crit_edge: +; CHECK-NEXT: br label [[FAILURE:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_PRELOOP:%.*]], [[BACKEDGE_PRELOOP:%.*]] ], [ [[IV_NEXT_1]], [[BACKEDGE_1]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.preloop: +; CHECK-NEXT: [[IV_PRELOOP:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_PRELOOP]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[CANONICAL_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[CANONICAL_IV_INC:%.*]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[REMAINDER_CHECK:%.*]] = icmp ult i64 [[CANONICAL_IV]], [[TMP1]] +; CHECK-NEXT: br i1 [[REMAINDER_CHECK]], label [[LOOP_PRELOOP_SPLIT:%.*]], label [[ENTRY_SPLIT]] +; CHECK: loop.preloop.split: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_PRELOOP]] = add i64 [[IV_PRELOOP]], 1 +; CHECK-NEXT: [[RC_PRELOOP:%.*]] = icmp sge i64 [[IV_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_PRELOOP]], label [[FAILURE]], label [[BACKEDGE_PRELOOP]] +; CHECK: backedge.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_PRELOOP]] +; CHECK-NEXT: [[X_PRELOOP:%.*]] = load i32, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[END_COND_PRELOOP:%.*]] = icmp eq i32 [[X_PRELOOP]], [[END]] +; CHECK-NEXT: [[CANONICAL_IV_INC]] = add i64 [[CANONICAL_IV]], 1 +; CHECK-NEXT: br i1 [[END_COND_PRELOOP]], label [[LOOP_PRELOOP]], label [[DONE]] +; CHECK: entry.split: +; CHECK-NEXT: [[IV_PRELOOP_LCSSA]] = phi i64 [ [[IV_PRELOOP]], [[LOOP_PRELOOP]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp uge i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 false, label [[LOOP_FAILURE_CRIT_EDGE]], label [[BACKEDGE_1]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP]], label [[DONE]] +; +entry: + %len = zext i32 %len.short to i64 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, 1 + %rc = icmp sge i64 %iv, %len + br i1 %rc, label %failure, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_04(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN_SHORT:%.*]], 0 +; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[FAILURE:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT]] to i64 +; CHECK-NEXT: [[LEN_MUNUS_1:%.*]] = add i64 [[LEN]], -1 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[LEN_SHORT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = add i1 [[TMP0]], true +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i64 +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_PRELOOP_LCSSA:%.*]], [[PREHEADER_SPLIT:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], -1 +; CHECK-NEXT: [[RC:%.*]] = icmp ne i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE:%.*]], label [[LOOP_FAILURE_LOOPEXIT_CRIT_EDGE:%.*]] +; CHECK: loop.failure.loopexit_crit_edge: +; CHECK-NEXT: br label [[FAILURE_LOOPEXIT:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_PRELOOP:%.*]], [[BACKEDGE_PRELOOP:%.*]] ], [ [[IV_NEXT_1]], [[BACKEDGE_1]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure.loopexit: +; CHECK-NEXT: br label [[FAILURE]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.preloop: +; CHECK-NEXT: [[IV_PRELOOP:%.*]] = phi i64 [ [[LEN_MUNUS_1]], [[PREHEADER]] ], [ [[IV_NEXT_PRELOOP]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[CANONICAL_IV:%.*]] = phi i64 [ 0, [[PREHEADER]] ], [ [[CANONICAL_IV_INC:%.*]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[REMAINDER_CHECK:%.*]] = icmp ult i64 [[CANONICAL_IV]], [[TMP2]] +; CHECK-NEXT: br i1 [[REMAINDER_CHECK]], label [[LOOP_PRELOOP_SPLIT:%.*]], label [[PREHEADER_SPLIT]] +; CHECK: loop.preloop.split: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_PRELOOP]] = add i64 [[IV_PRELOOP]], -1 +; CHECK-NEXT: [[RC_PRELOOP:%.*]] = icmp ne i64 [[IV_PRELOOP]], 0 +; CHECK-NEXT: br i1 [[RC_PRELOOP]], label [[BACKEDGE_PRELOOP]], label [[FAILURE_LOOPEXIT]] +; CHECK: backedge.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_PRELOOP]] +; CHECK-NEXT: [[X_PRELOOP:%.*]] = load i32, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[END_COND_PRELOOP:%.*]] = icmp eq i32 [[X_PRELOOP]], [[END]] +; CHECK-NEXT: [[CANONICAL_IV_INC]] = add i64 [[CANONICAL_IV]], 1 +; CHECK-NEXT: br i1 [[END_COND_PRELOOP]], label [[LOOP_PRELOOP]], label [[DONE]] +; CHECK: preheader.split: +; CHECK-NEXT: [[IV_PRELOOP_LCSSA]] = phi i64 [ [[IV_PRELOOP]], [[LOOP_PRELOOP]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_NEXT]], -1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ne i64 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[LOOP_FAILURE_LOOPEXIT_CRIT_EDGE]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP]], label [[DONE]] +; +entry: + %zero.check = icmp eq i32 %len.short, 0 + br i1 %zero.check, label %failure, label %preheader + +preheader: + %len = zext i32 %len.short to i64 + %len.munus.1 = add i64 %len, -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %len.munus.1, %preheader ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, -1 + %rc = icmp ne i64 %iv, 0 + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_05(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN_SHORT:%.*]], 0 +; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[FAILURE:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT]] to i64 +; CHECK-NEXT: [[LEN_MUNUS_1:%.*]] = add i64 [[LEN]], -1 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[LEN_SHORT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = add i1 [[TMP0]], true +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i64 +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_PRELOOP_LCSSA:%.*]], [[PREHEADER_SPLIT:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], -1 +; CHECK-NEXT: [[RC:%.*]] = icmp ne i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE:%.*]], label [[LOOP_FAILURE_LOOPEXIT_CRIT_EDGE:%.*]] +; CHECK: loop.failure.loopexit_crit_edge: +; CHECK-NEXT: br label [[FAILURE_LOOPEXIT:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], [[BACKEDGE]] ], [ [[IV_NEXT_PRELOOP:%.*]], [[BACKEDGE_PRELOOP:%.*]] ], [ [[IV_NEXT_1]], [[BACKEDGE_1]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure.loopexit: +; CHECK-NEXT: br label [[FAILURE]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.preloop: +; CHECK-NEXT: [[IV_PRELOOP:%.*]] = phi i64 [ [[LEN_MUNUS_1]], [[PREHEADER]] ], [ [[IV_NEXT_PRELOOP]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[CANONICAL_IV:%.*]] = phi i64 [ 0, [[PREHEADER]] ], [ [[CANONICAL_IV_INC:%.*]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[REMAINDER_CHECK:%.*]] = icmp ult i64 [[CANONICAL_IV]], [[TMP2]] +; CHECK-NEXT: br i1 [[REMAINDER_CHECK]], label [[LOOP_PRELOOP_SPLIT:%.*]], label [[PREHEADER_SPLIT]] +; CHECK: loop.preloop.split: +; CHECK-NEXT: [[IV_NEXT_PRELOOP]] = add i64 [[IV_PRELOOP]], -1 +; CHECK-NEXT: [[RC_PRELOOP:%.*]] = icmp ne i64 [[IV_PRELOOP]], 0 +; CHECK-NEXT: br i1 [[RC_PRELOOP]], label [[BACKEDGE_PRELOOP]], label [[FAILURE_LOOPEXIT]] +; CHECK: backedge.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_PRELOOP]] +; CHECK-NEXT: [[X_PRELOOP:%.*]] = load i32, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[END_COND_PRELOOP:%.*]] = icmp eq i32 [[X_PRELOOP]], [[END]] +; CHECK-NEXT: [[CANONICAL_IV_INC]] = add i64 [[CANONICAL_IV]], 1 +; CHECK-NEXT: br i1 [[END_COND_PRELOOP]], label [[LOOP_PRELOOP]], label [[DONE]] +; CHECK: preheader.split: +; CHECK-NEXT: [[IV_PRELOOP_LCSSA]] = phi i64 [ [[IV_PRELOOP]], [[LOOP_PRELOOP]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_NEXT]], -1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ne i64 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[LOOP_FAILURE_LOOPEXIT_CRIT_EDGE]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP]], label [[DONE]] +; +entry: + %zero.check = icmp eq i32 %len.short, 0 + br i1 %zero.check, label %failure, label %preheader + +preheader: + %len = zext i32 %len.short to i64 + %len.munus.1 = add i64 %len, -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %len.munus.1, %preheader ], [ %iv.next, %backedge ] + %iv.next = add i64 %iv, -1 + %rc = icmp ne i64 %iv, 0 + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +} + +define i64 @test_06(i32* %p, i32 %end, i32 %len.short) { +; CHECK-LABEL: @test_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = zext i32 [[LEN_SHORT:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[LEN_SHORT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i64 +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_BACKEDGE:%.*]] ] +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_PRELOOP_LCSSA:%.*]], [[OUTER_SPLIT:%.*]] ], [ [[IV_NEXT_1:%.*]], [[BACKEDGE_1:%.*]] ] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ult i64 [[IV]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE:%.*]], label [[LOOP_FAILURE_CRIT_EDGE:%.*]] +; CHECK: loop.failure_crit_edge: +; CHECK-NEXT: br label [[FAILURE:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[END_COND:%.*]] = icmp eq i32 [[X]], [[END:%.*]] +; CHECK-NEXT: br i1 [[END_COND]], label [[LOOP_1:%.*]], label [[OUTER_BACKEDGE_LOOPEXIT3:%.*]] +; CHECK: outer_backedge.loopexit: +; CHECK-NEXT: [[IV_NEXT_LCSSA2_PH:%.*]] = phi i64 [ [[IV_NEXT_PRELOOP:%.*]], [[BACKEDGE_PRELOOP:%.*]] ] +; CHECK-NEXT: br label [[OUTER_BACKEDGE]] +; CHECK: outer_backedge.loopexit3: +; CHECK-NEXT: [[IV_NEXT_LCSSA2_PH4:%.*]] = phi i64 [ [[IV_NEXT_1]], [[BACKEDGE_1]] ], [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: br label [[OUTER_BACKEDGE]] +; CHECK: outer_backedge: +; CHECK-NEXT: [[IV_NEXT_LCSSA2:%.*]] = phi i64 [ [[IV_NEXT_LCSSA2_PH]], [[OUTER_BACKEDGE_LOOPEXIT:%.*]] ], [ [[IV_NEXT_LCSSA2_PH4]], [[OUTER_BACKEDGE_LOOPEXIT3]] ] +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i64 [[OUTER_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[OUTER_COND]], label [[OUTER]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT_LCSSA2]], [[OUTER_BACKEDGE]] ] +; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK: failure.loopexit: +; CHECK-NEXT: br label [[FAILURE]] +; CHECK: failure: +; CHECK-NEXT: ret i64 -1 +; CHECK: loop.preloop: +; CHECK-NEXT: [[IV_PRELOOP:%.*]] = phi i64 [ 0, [[OUTER]] ], [ [[IV_NEXT_PRELOOP]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[CANONICAL_IV:%.*]] = phi i64 [ 0, [[OUTER]] ], [ [[CANONICAL_IV_INC:%.*]], [[BACKEDGE_PRELOOP]] ] +; CHECK-NEXT: [[REMAINDER_CHECK:%.*]] = icmp ult i64 [[CANONICAL_IV]], [[TMP1]] +; CHECK-NEXT: br i1 [[REMAINDER_CHECK]], label [[LOOP_PRELOOP_SPLIT:%.*]], label [[OUTER_SPLIT]] +; CHECK: loop.preloop.split: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_PRELOOP]] = add i64 [[IV_PRELOOP]], 1 +; CHECK-NEXT: [[RC_PRELOOP:%.*]] = icmp slt i64 [[IV_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[RC_PRELOOP]], label [[BACKEDGE_PRELOOP]], label [[FAILURE_LOOPEXIT:%.*]] +; CHECK: backedge.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_PRELOOP]] +; CHECK-NEXT: [[X_PRELOOP:%.*]] = load i32, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[END_COND_PRELOOP:%.*]] = icmp eq i32 [[X_PRELOOP]], [[END]] +; CHECK-NEXT: [[CANONICAL_IV_INC]] = add i64 [[CANONICAL_IV]], 1 +; CHECK-NEXT: br i1 [[END_COND_PRELOOP]], label [[LOOP_PRELOOP]], label [[OUTER_BACKEDGE_LOOPEXIT]] +; CHECK: outer.split: +; CHECK-NEXT: [[IV_PRELOOP_LCSSA]] = phi i64 [ [[IV_PRELOOP]], [[LOOP_PRELOOP]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[RC_1:%.*]] = icmp ult i64 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1]], label [[LOOP_FAILURE_CRIT_EDGE]] +; CHECK: backedge.1: +; CHECK-NEXT: [[ADDR_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR_1]], align 4 +; CHECK-NEXT: [[END_COND_1:%.*]] = icmp eq i32 [[X_1]], [[END]] +; CHECK-NEXT: br i1 [[END_COND_1]], label [[LOOP]], label [[OUTER_BACKEDGE_LOOPEXIT3]] +; +entry: + %len = zext i32 %len.short to i64 + br label %outer + +outer: + %outer.iv = phi i64 [0, %entry], [%outer.iv.next, %outer_backedge] + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ 0, %outer ], [ %iv.next, %backedge ] + call void @side_effect() + %iv.next = add i64 %iv, 1 + %rc = icmp slt i64 %iv, %len + br i1 %rc, label %backedge, label %failure + +backedge: ; preds = %loop + %addr = getelementptr i32, i32* %p, i64 %iv + %x = load i32, i32* %addr + %end.cond = icmp eq i32 %x, %end + br i1 %end.cond, label %loop, label %outer_backedge + +outer_backedge: + %outer.iv.next = add i64 %outer.iv, 1 + %outer.cond = icmp slt i64 %outer.iv.next, 1000 + br i1 %outer.cond, label %outer, label %done + +done: ; preds = %backedge + %iv.next.lcssa = phi i64 [ %iv.next, %outer_backedge ] + ret i64 %iv.next.lcssa + +failure: ; preds = %loop + ret i64 -1 +}