Index: llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp (revision 313980) +++ llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp (revision 313981) @@ -1,330 +1,512 @@ //===-- LoopPredication.cpp - Guard based loop predication pass -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // The LoopPredication pass tries to convert loop variant range checks to loop // invariant by widening checks across loop iterations. For example, it will // convert // // for (i = 0; i < n; i++) { // guard(i < len); // ... // } // // to // // for (i = 0; i < n; i++) { // guard(n - 1 < len); // ... // } // // After this transformation the condition of the guard is loop invariant, so // loop-unswitch can later unswitch the loop by this condition which basically // predicates the loop by the widened condition: // // if (n - 1 < len) // for (i = 0; i < n; i++) { // ... // } // else // deoptimize // +// It's tempting to rely on SCEV here, but it has proven to be problematic. +// Generally the facts SCEV provides about the increment step of add +// recurrences are true if the backedge of the loop is taken, which implicitly +// assumes that the guard doesn't fail. Using these facts to optimize the +// guard results in a circular logic where the guard is optimized under the +// assumption that it never fails. +// +// For example, in the loop below the induction variable will be marked as nuw +// basing on the guard. Basing on nuw the guard predicate will be considered +// monotonic. Given a monotonic condition it's tempting to replace the induction +// variable in the condition with its value on the last iteration. But this +// transformation is not correct, e.g. e = 4, b = 5 breaks the loop. +// +// for (int i = b; i != e; i++) +// guard(i u< len) +// +// One of the ways to reason about this problem is to use an inductive proof +// approach. Given the loop: +// +// if (B(Start)) { +// do { +// I = PHI(Start, I.INC) +// I.INC = I + Step +// guard(G(I)); +// } while (B(I.INC)); +// } +// +// where B(x) and G(x) are predicates that map integers to booleans, we want a +// loop invariant expression M such the following program has the same semantics +// as the above: +// +// if (B(Start)) { +// do { +// I = PHI(Start, I.INC) +// I.INC = I + Step +// guard(G(Start) && M); +// } while (B(I.INC)); +// } +// +// One solution for M is M = forall X . (G(X) && B(X + Step)) => G(X + Step) +// +// Informal proof that the transformation above is correct: +// +// By the definition of guards we can rewrite the guard condition to: +// G(I) && G(Start) && M +// +// Let's prove that for each iteration of the loop: +// G(Start) && M => G(I) +// And the condition above can be simplified to G(Start) && M. +// +// Induction base. +// G(Start) && M => G(Start) +// +// Induction step. Assuming G(Start) && M => G(I) on the subsequent +// iteration: +// +// B(I + Step) is true because it's the backedge condition. +// G(I) is true because the backedge is guarded by this condition. +// +// So M = forall X . (G(X) && B(X + Step)) => G(X + Step) implies +// G(I + Step). +// +// Note that we can use anything stronger than M, i.e. any condition which +// implies M. +// +// For now the transformation is limited to the following case: +// * The loop has a single latch with either ult or slt icmp condition. +// * The step of the IV used in the latch condition is 1. +// * The IV of the latch condition is the same as the post increment IV of the +// guard condition. +// * The guard condition is ult. +// +// In this case the latch is of the from: +// ++i u< latchLimit or ++i s< latchLimit +// and the guard is of the form: +// i u< guardLimit +// +// For the unsigned latch comparison case M is: +// forall X . X u< guardLimit && (X + 1) u< latchLimit => +// (X + 1) u< guardLimit +// +// This is true if latchLimit u<= guardLimit since then +// (X + 1) u< latchLimit u<= guardLimit == (X + 1) u< guardLimit. +// +// So the widened condition is: +// i.start u< guardLimit && latchLimit u<= guardLimit +// +// For the signed latch comparison case M is: +// forall X . X u< guardLimit && (X + 1) s< latchLimit => +// (X + 1) u< guardLimit +// +// The only way the antecedent can be true and the consequent can be false is +// if +// X == guardLimit - 1 +// (and guardLimit is non-zero, but we won't use this latter fact). +// If X == guardLimit - 1 then the second half of the antecedent is +// guardLimit s< latchLimit +// and its negation is +// latchLimit s<= guardLimit. +// +// In other words, if latchLimit s<= guardLimit then: +// (the ranges below are written in ConstantRange notation, where [A, B) is the +// set for (I = A; I != B; I++ /*maywrap*/) yield(I);) +// +// forall X . X u< guardLimit && (X + 1) s< latchLimit => (X + 1) u< guardLimit +// == forall X . X u< guardLimit && (X + 1) s< guardLimit => (X + 1) u< guardLimit +// == forall X . X in [0, guardLimit) && (X + 1) in [INT_MIN, guardLimit) => (X + 1) in [0, guardLimit) +// == forall X . X in [0, guardLimit) && X in [INT_MAX, guardLimit-1) => X in [-1, guardLimit-1) +// == forall X . X in [0, guardLimit-1) => X in [-1, guardLimit-1) +// == true +// +// So the widened condition is: +// i.start u< guardLimit && latchLimit s<= guardLimit +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopUtils.h" #define DEBUG_TYPE "loop-predication" using namespace llvm; namespace { class LoopPredication { /// Represents an induction variable check: /// icmp Pred, , struct LoopICmp { ICmpInst::Predicate Pred; const SCEVAddRecExpr *IV; const SCEV *Limit; LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, const SCEV *Limit) : Pred(Pred), IV(IV), Limit(Limit) {} LoopICmp() {} }; ScalarEvolution *SE; Loop *L; const DataLayout *DL; BasicBlock *Preheader; + LoopICmp LatchCheck; - Optional parseLoopICmp(ICmpInst *ICI); + Optional parseLoopICmp(ICmpInst *ICI) { + return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0), + ICI->getOperand(1)); + } + Optional parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS, + Value *RHS); + + Optional parseLoopLatchICmp(); Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Instruction *InsertAt); Optional widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, IRBuilder<> &Builder); bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); public: LoopPredication(ScalarEvolution *SE) : SE(SE){}; bool runOnLoop(Loop *L); }; class LoopPredicationLegacyPass : public LoopPass { public: static char ID; LoopPredicationLegacyPass() : LoopPass(ID) { initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { getLoopAnalysisUsage(AU); } bool runOnLoop(Loop *L, LPPassManager &LPM) override { if (skipLoop(L)) return false; auto *SE = &getAnalysis().getSE(); LoopPredication LP(SE); return LP.runOnLoop(L); } }; char LoopPredicationLegacyPass::ID = 0; } // end namespace llvm INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication", "Loop predication", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication", "Loop predication", false, false) Pass *llvm::createLoopPredicationPass() { return new LoopPredicationLegacyPass(); } PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { LoopPredication LP(&AR.SE); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); } Optional -LoopPredication::parseLoopICmp(ICmpInst *ICI) { - ICmpInst::Predicate Pred = ICI->getPredicate(); - - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); +LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS, + Value *RHS) { const SCEV *LHSS = SE->getSCEV(LHS); if (isa(LHSS)) return None; const SCEV *RHSS = SE->getSCEV(RHS); if (isa(RHSS)) return None; // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV if (SE->isLoopInvariant(LHSS, L)) { std::swap(LHS, RHS); std::swap(LHSS, RHSS); Pred = ICmpInst::getSwappedPredicate(Pred); } const SCEVAddRecExpr *AR = dyn_cast(LHSS); if (!AR || AR->getLoop() != L) return None; return LoopICmp(Pred, AR, RHSS); } Value *LoopPredication::expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Instruction *InsertAt) { + // TODO: we can check isLoopEntryGuardedByCond before emitting the check + Type *Ty = LHS->getType(); assert(Ty == RHS->getType() && "expandCheck operands have different types?"); Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt); return Builder.CreateICmp(Pred, LHSV, RHSV); } /// If ICI can be widened to a loop invariant condition emits the loop /// invariant condition in the loop preheader and return it, otherwise /// returns None. Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, IRBuilder<> &Builder) { DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); DEBUG(ICI->dump()); + // parseLoopStructure guarantees that the latch condition is: + // ++i u< latchLimit or ++i s< latchLimit + // We are looking for the range checks of the form: + // i u< guardLimit auto RangeCheck = parseLoopICmp(ICI); if (!RangeCheck) { DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); return None; } - - ICmpInst::Predicate Pred = RangeCheck->Pred; - const SCEVAddRecExpr *IndexAR = RangeCheck->IV; - const SCEV *RHSS = RangeCheck->Limit; - - auto CanExpand = [this](const SCEV *S) { - return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); - }; - if (!CanExpand(RHSS)) + if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { + DEBUG(dbgs() << "Unsupported range check predicate(" << RangeCheck->Pred + << ")!\n"); return None; - - DEBUG(dbgs() << "IndexAR: "); - DEBUG(IndexAR->dump()); - - bool IsIncreasing = false; - if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing)) - return None; - - // If the predicate is increasing the condition can change from false to true - // as the loop progresses, in this case take the value on the first iteration - // for the widened check. Otherwise the condition can change from true to - // false as the loop progresses, so take the value on the last iteration. - const SCEV *NewLHSS = IsIncreasing - ? IndexAR->getStart() - : SE->getSCEVAtScope(IndexAR, L->getParentLoop()); - if (NewLHSS == IndexAR) { - DEBUG(dbgs() << "Can't compute NewLHSS!\n"); + } + auto *RangeCheckIV = RangeCheck->IV; + auto *PostIncRangeCheckIV = RangeCheckIV->getPostIncExpr(*SE); + if (LatchCheck.IV != PostIncRangeCheckIV) { + DEBUG(dbgs() << "Post increment range check IV (" << *PostIncRangeCheckIV + << ") is not the same as latch IV (" << *LatchCheck.IV + << ")!\n"); return None; } + assert(RangeCheckIV->getStepRecurrence(*SE)->isOne() && "must be one"); + const SCEV *Start = RangeCheckIV->getStart(); - DEBUG(dbgs() << "NewLHSS: "); - DEBUG(NewLHSS->dump()); + // Generate the widened condition. See the file header comment for reasoning. + // If the latch condition is unsigned: + // i.start u< guardLimit && latchLimit u<= guardLimit + // If the latch condition is signed: + // i.start u< guardLimit && latchLimit s<= guardLimit + + auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred) + ? ICmpInst::ICMP_SLE + : ICmpInst::ICMP_ULE; - if (!CanExpand(NewLHSS)) + auto CanExpand = [this](const SCEV *S) { + return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); + }; + if (!CanExpand(Start) || !CanExpand(LatchCheck.Limit) || + !CanExpand(RangeCheck->Limit)) return None; - DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n"); - Instruction *InsertAt = Preheader->getTerminator(); - return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt); + auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck->Pred, + Start, RangeCheck->Limit, InsertAt); + auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, + LatchCheck.Limit, RangeCheck->Limit, InsertAt); + return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, SCEVExpander &Expander) { DEBUG(dbgs() << "Processing guard:\n"); DEBUG(Guard->dump()); IRBuilder<> Builder(cast(Preheader->getTerminator())); // The guard condition is expected to be in form of: // cond1 && cond2 && cond3 ... // Iterate over subconditions looking for for icmp conditions which can be // widened across loop iterations. Widening these conditions remember the // resulting list of subconditions in Checks vector. SmallVector Worklist(1, Guard->getOperand(0)); SmallPtrSet Visited; SmallVector Checks; unsigned NumWidened = 0; do { Value *Condition = Worklist.pop_back_val(); if (!Visited.insert(Condition).second) continue; Value *LHS, *RHS; using namespace llvm::PatternMatch; if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) { Worklist.push_back(LHS); Worklist.push_back(RHS); continue; } if (ICmpInst *ICI = dyn_cast(Condition)) { if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) { Checks.push_back(NewRangeCheck.getValue()); NumWidened++; continue; } } // Save the condition as is if we can't widen it Checks.push_back(Condition); } while (Worklist.size() != 0); if (NumWidened == 0) return false; // Emit the new guard condition Builder.SetInsertPoint(Guard); Value *LastCheck = nullptr; for (auto *Check : Checks) if (!LastCheck) LastCheck = Check; else LastCheck = Builder.CreateAnd(LastCheck, Check); Guard->setOperand(0, LastCheck); DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); return true; } +Optional LoopPredication::parseLoopLatchICmp() { + using namespace PatternMatch; + + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { + DEBUG(dbgs() << "The loop doesn't have a single latch!\n"); + return None; + } + + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + BasicBlock *TrueDest, *FalseDest; + + if (!match(LoopLatch->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest, + FalseDest))) { + DEBUG(dbgs() << "Failed to match the latch terminator!\n"); + return None; + } + assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) && + "One of the latch's destinations must be the header"); + if (TrueDest != L->getHeader()) + Pred = ICmpInst::getInversePredicate(Pred); + + auto Result = parseLoopICmp(Pred, LHS, RHS); + if (!Result) { + DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); + return None; + } + + if (Result->Pred != ICmpInst::ICMP_ULT && + Result->Pred != ICmpInst::ICMP_SLT) { + DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred + << ")!\n"); + return None; + } + + // Check affine first, so if it's not we don't try to compute the step + // recurrence. + if (!Result->IV->isAffine()) { + DEBUG(dbgs() << "The induction variable is not affine!\n"); + return None; + } + + auto *Step = Result->IV->getStepRecurrence(*SE); + if (!Step->isOne()) { + DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); + return None; + } + + return Result; +} + bool LoopPredication::runOnLoop(Loop *Loop) { L = Loop; DEBUG(dbgs() << "Analyzing "); DEBUG(L->dump()); Module *M = L->getHeader()->getModule(); // There is nothing to do if the module doesn't use guards auto *GuardDecl = M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard)); if (!GuardDecl || GuardDecl->use_empty()) return false; DL = &M->getDataLayout(); Preheader = L->getLoopPreheader(); if (!Preheader) return false; + auto LatchCheckOpt = parseLoopLatchICmp(); + if (!LatchCheckOpt) + return false; + LatchCheck = *LatchCheckOpt; + // Collect all the guards into a vector and process later, so as not // to invalidate the instruction iterator. SmallVector Guards; for (const auto BB : L->blocks()) for (auto &I : *BB) if (auto *II = dyn_cast(&I)) if (II->getIntrinsicID() == Intrinsic::experimental_guard) Guards.push_back(II); if (Guards.empty()) return false; SCEVExpander Expander(*SE, *DL, "loop-predication"); bool Changed = false; for (auto *Guard : Guards) Changed |= widenGuardConditions(Guard, Expander); return Changed; } Index: llvm/trunk/test/Transforms/LoopPredication/basic.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/basic.ll (revision 313980) +++ llvm/trunk/test/Transforms/LoopPredication/basic.ll (revision 313981) @@ -1,571 +1,653 @@ ; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s ; RUN: opt -S -passes='require,loop(loop-predication)' < %s 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) define i32 @unsigned_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @unsigned_loop_0_to_n_ult_check entry: %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } define i32 @unsigned_loop_0_to_n_ugt_check(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @unsigned_loop_0_to_n_ugt_check entry: %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] %within.bounds = icmp ugt i32 %length, %i call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } - -define i32 @two_range_checks(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, i32 %n) { -; CHECK-LABEL: @two_range_checks +define i32 @signed_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_ult_check entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - %within.bounds.1 = icmp ult i32 %i, %length.1 - %within.bounds.2 = icmp ult i32 %i, %length.2 - %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.1, %array.2.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } -define i32 @three_range_checks(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, - i32* %array.3, i32 %length.3, i32 %n) { -; CHECK-LABEL: @three_range_checks +define i32 @unsupported_latch_pred_loop_0_to_n(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @unsupported_latch_pred_loop_0_to_n entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} -; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] -; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: %within.bounds = icmp ult i32 %i, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - %within.bounds.1 = icmp ult i32 %i, %length.1 - %within.bounds.2 = icmp ult i32 %i, %length.2 - %within.bounds.3 = icmp ult i32 %i, %length.3 - %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 - %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.2 = add i32 %loop.acc.1, %array.2.i - - %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 - %array.3.i = load i32, i32* %array.3.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i - %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %i.next = add nsw i32 %i, 1 + %continue = icmp ne i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } -define i32 @three_guards(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, - i32* %array.3, i32 %length.3, i32 %n) { -; CHECK-LABEL: @three_guards +define i32 @signed_loop_0_to_n_unsupported_iv_step(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_unsupported_iv_step entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1 -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2 -; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3 ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ] - +; CHECK: %within.bounds = icmp ult i32 %i, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - - %within.bounds.1 = icmp ult i32 %i, %length.1 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ] + %within.bounds = icmp ult i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %within.bounds.2 = icmp ult i32 %i, %length.2 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ] - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.2 = add i32 %loop.acc.1, %array.2.i - - %within.bounds.3 = icmp ult i32 %i, %length.3 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ] - - %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 - %array.3.i = load i32, i32* %array.3.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i - %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %i.next = add nsw i32 %i, 2 + %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } -define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check +define i32 @signed_loop_0_to_n_equal_iv_range_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_equal_iv_range_check entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0 +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds = icmp sge i32 %i, 0 + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %loop ], [ 0, %loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i + %j.next = add nsw i32 %j, 1 %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } -define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check +define i32 @signed_loop_0_to_n_unrelated_iv_range_check(i32* %array, i32 %start, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_unrelated_iv_range_check entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1 -; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] -; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: %within.bounds = icmp ult i32 %j, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds = icmp slt i32 %i, %length + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %loop ], [ %start, %loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i + %j.next = add nsw i32 %j, 1 %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } -define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_both_checks +define i32 @two_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, i32 %n) { +; CHECK-LABEL: @two_range_checks entry: - %tmp5 = icmp sle i32 %n, 0 + %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0 -; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1 -; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] -; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 -; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds.1 = icmp slt i32 %i, %length - %within.bounds.2 = icmp sge i32 %i, 0 + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 %within.bounds = and i1 %within.bounds.1, %within.bounds.2 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 - %array.i = load i32, i32* %array.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc, %array.i + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i - %i.next = add nsw i32 %i, 1 - %continue = icmp slt i32 %i.next, %n + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.2.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_range_checks +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] +; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds.3 = icmp ult i32 %i, %length.3 + %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_guards(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_guards +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] +; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ] + + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + + %within.bounds.1 = icmp ult i32 %i, %length.1 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %within.bounds.2 = icmp ult i32 %i, %length.2 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ] + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %within.bounds.3 = icmp ult i32 %i, %length.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ] + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } define i32 @unsigned_loop_0_to_n_unrelated_condition(i32* %array, i32 %length, i32 %n, i32 %x) { ; CHECK-LABEL: @unsigned_loop_0_to_n_unrelated_condition entry: %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: %unrelated.cond = icmp ult i32 %x, %length ; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] %within.bounds = icmp ult i32 %i, %length %unrelated.cond = icmp ult i32 %x, %length %guard.cond = and i1 %within.bounds, %unrelated.cond call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } ; Don't change the guard condition if there were no widened subconditions define i32 @test_no_widened_conditions(i32* %array, i32 %length, i32 %n, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: @test_no_widened_conditions entry: %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: %unrelated.cond.1 = icmp eq i32 %x1, %i ; CHECK-NEXT: %unrelated.cond.2 = icmp eq i32 %x2, %i ; CHECK-NEXT: %unrelated.cond.3 = icmp eq i32 %x3, %i ; CHECK-NEXT: %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2 ; CHECK-NEXT: %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] %unrelated.cond.1 = icmp eq i32 %x1, %i %unrelated.cond.2 = icmp eq i32 %x2, %i %unrelated.cond.3 = icmp eq i32 %x3, %i %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2 %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3 call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } define i32 @signed_loop_start_to_n_loop_variant_bound(i32* %array, i32 %x, i32 %start, i32 %n) { ; CHECK-LABEL: @signed_loop_start_to_n_loop_variant_bound entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: %bound = add i32 %i, %x -; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound +; CHECK-NEXT: %within.bounds = icmp ult i32 %i, %bound ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] %bound = add i32 %i, %x - %within.bounds = icmp slt i32 %i, %bound + %within.bounds = icmp ult i32 %i, %bound call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } define i32 @signed_loop_start_to_n_non_monotonic_predicate(i32* %array, i32 %x, i32 %start, i32 %n) { ; CHECK-LABEL: @signed_loop_start_to_n_non_monotonic_predicate entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: %guard.cond = icmp eq i32 %i, %x ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] %guard.cond = icmp eq i32 %i, %x call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } define i32 @unsigned_loop_0_to_n_hoist_length(i32* %array, i16 %length.i16, i32 %n) { ; CHECK-LABEL: @unsigned_loop_0_to_n_hoist_length entry: %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[length:[^ ]+]] = zext i16 %length.i16 to i32 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], [[length]] +; CHECK: [[length:[^ ]+]] = zext i16 %length.i16 to i32 +; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, [[length]] +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, [[length]] +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] %length = zext i16 %length.i16 to i32 %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } define i32 @unsigned_loop_0_to_n_cant_hoist_length(i32* %array, i32 %length, i32 %divider, i32 %n) { ; CHECK-LABEL: @unsigned_loop_0_to_n_cant_hoist_length entry: %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK-NEXT: %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] ; CHECK-NEXT: %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] ; CHECK-NEXT: %length.udiv = udiv i32 %length, %divider ; CHECK-NEXT: %within.bounds = icmp ult i32 %i, %length.udiv ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] %length.udiv = udiv i32 %length, %divider %within.bounds = icmp ult i32 %i, %length.udiv call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } Index: llvm/trunk/test/Transforms/LoopPredication/nested.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/nested.ll (revision 313980) +++ llvm/trunk/test/Transforms/LoopPredication/nested.ll (revision 313981) @@ -1,160 +1,217 @@ ; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s ; RUN: opt -S -passes='require,loop(loop-predication)' < %s 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) define i32 @signed_loop_0_to_n_nested_0_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) { ; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_inner_index_check entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %outer.loop.preheader outer.loop.preheader: -; CHECK: outer.loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1 br label %outer.loop outer.loop: %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %tmp6 = icmp sle i32 %l, 0 br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader inner.loop.preheader: ; CHECK: inner.loop.preheader: -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %l, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %inner.loop br label %inner.loop inner.loop: ; CHECK: inner.loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %j, %length + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %j.i64 = zext i32 %j to i64 %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64 %array.j = load i32, i32* %array.j.ptr, align 4 %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j %j.next = add nsw i32 %j, 1 %inner.continue = icmp slt i32 %j.next, %l br i1 %inner.continue, label %inner.loop, label %outer.loop.inc outer.loop.inc: %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] %i.next = add nsw i32 %i, 1 %outer.continue = icmp slt i32 %i.next, %n br i1 %outer.continue, label %outer.loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] ret i32 %result } define i32 @signed_loop_0_to_n_nested_0_to_l_outer_index_check(i32* %array, i32 %length, i32 %n, i32 %l) { ; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_outer_index_check entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %outer.loop.preheader outer.loop.preheader: ; CHECK: outer.loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %outer.loop br label %outer.loop outer.loop: %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %tmp6 = icmp sle i32 %l, 0 br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader inner.loop.preheader: br label %inner.loop inner.loop: ; CHECK: inner.loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %i, %length + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %inner.loop.acc.next = add i32 %inner.loop.acc, %array.i %j.next = add nsw i32 %j, 1 %inner.continue = icmp slt i32 %j.next, %l br i1 %inner.continue, label %inner.loop, label %outer.loop.inc outer.loop.inc: %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] %i.next = add nsw i32 %i, 1 %outer.continue = icmp slt i32 %i.next, %n br i1 %outer.continue, label %outer.loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] ret i32 %result } define i32 @signed_loop_0_to_n_nested_i_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) { ; CHECK-LABEL: @signed_loop_0_to_n_nested_i_to_l_inner_index_check entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %outer.loop.preheader outer.loop.preheader: +; CHECK: outer.loop.preheader: +; CHECK-NEXT: [[first_iteration_check_outer:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check_outer:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond_outer:[^ ]+]] = and i1 [[first_iteration_check_outer]], [[limit_check_outer]] +; CHECK-NEXT: br label %outer.loop br label %outer.loop outer.loop: ; CHECK: outer.loop: -; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1 -; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]] -; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1 %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %tmp6 = icmp sle i32 %l, 0 br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader inner.loop.preheader: ; CHECK: inner.loop.preheader: -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length +; CHECK: [[limit_check_inner:[^ ]+]] = icmp sle i32 %l, %length +; CHECK: br label %inner.loop br label %inner.loop inner.loop: ; CHECK: inner.loop: +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[limit_check_inner]], [[wide_cond_outer]] ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %j, %length + %within.bounds = icmp ult i32 %j, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %j.i64 = zext i32 %j to i64 + %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64 + %array.j = load i32, i32* %array.j.ptr, align 4 + %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j + + %j.next = add nsw i32 %j, 1 + %inner.continue = icmp slt i32 %j.next, %l + br i1 %inner.continue, label %inner.loop, label %outer.loop.inc + +outer.loop.inc: + %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] + %i.next = add nsw i32 %i, 1 + %outer.continue = icmp slt i32 %i.next, %n + br i1 %outer.continue, label %outer.loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] + ret i32 %result +} + +define i32 @cant_expand_guard_check_start(i32* %array, i32 %length, i32 %n, i32 %l, i32 %maybezero) { +; CHECK-LABEL: @cant_expand_guard_check_start +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %outer.loop.preheader + +outer.loop.preheader: + br label %outer.loop + +outer.loop: + %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %tmp6 = icmp sle i32 %l, 0 + %div = udiv i32 %i, %maybezero + br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader + +inner.loop.preheader: +; CHECK: inner.loop.preheader: +; CHECK: br label %inner.loop + br label %inner.loop + +inner.loop: +; CHECK: inner.loop: +; CHECK: %within.bounds = icmp ult i32 %j, %length +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] + %j = phi i32 [ %j.next, %inner.loop ], [ %div, %inner.loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %j.i64 = zext i32 %j to i64 %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64 %array.j = load i32, i32* %array.j.ptr, align 4 %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j %j.next = add nsw i32 %j, 1 %inner.continue = icmp slt i32 %j.next, %l br i1 %inner.continue, label %inner.loop, label %outer.loop.inc outer.loop.inc: %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] %i.next = add nsw i32 %i, 1 %outer.continue = icmp slt i32 %i.next, %n br i1 %outer.continue, label %outer.loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] ret i32 %result } \ No newline at end of file Index: llvm/trunk/test/Transforms/LoopPredication/visited.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/visited.ll (revision 313980) +++ llvm/trunk/test/Transforms/LoopPredication/visited.ll (revision 313981) @@ -1,140 +1,141 @@ ; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s ; RUN: opt -S -passes='require,loop(loop-predication)' < %s 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) define i32 @test_visited(i32* %array, i32 %length, i32 %n, i32 %x) { ; CHECK-LABEL: @test_visited entry: %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: ; CHECK: %unrelated.cond = icmp eq i32 %x, %i ; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] %within.bounds = icmp ult i32 %i, %length %unrelated.cond = icmp eq i32 %x, %i %guard.cond.2 = and i1 %within.bounds, %unrelated.cond %guard.cond.3 = and i1 %guard.cond.2, %unrelated.cond %guard.cond.4 = and i1 %guard.cond.3, %guard.cond.2 %guard.cond.5 = and i1 %guard.cond.4, %guard.cond.3 %guard.cond.6 = and i1 %guard.cond.5, %guard.cond.4 %guard.cond.7 = and i1 %guard.cond.6, %guard.cond.5 %guard.cond.8 = and i1 %guard.cond.7, %guard.cond.6 %guard.cond.9 = and i1 %guard.cond.8, %guard.cond.7 %guard.cond.10 = and i1 %guard.cond.9, %guard.cond.8 %guard.cond.11 = and i1 %guard.cond.10, %guard.cond.9 %guard.cond.12 = and i1 %guard.cond.11, %guard.cond.10 %guard.cond.13 = and i1 %guard.cond.12, %guard.cond.11 %guard.cond.14 = and i1 %guard.cond.13, %guard.cond.12 %guard.cond.15 = and i1 %guard.cond.14, %guard.cond.13 %guard.cond.16 = and i1 %guard.cond.15, %guard.cond.14 %guard.cond.17 = and i1 %guard.cond.16, %guard.cond.15 %guard.cond.18 = and i1 %guard.cond.17, %guard.cond.16 %guard.cond.19 = and i1 %guard.cond.18, %guard.cond.17 %guard.cond.20 = and i1 %guard.cond.19, %guard.cond.18 %guard.cond.21 = and i1 %guard.cond.20, %guard.cond.19 %guard.cond.22 = and i1 %guard.cond.21, %guard.cond.20 %guard.cond.23 = and i1 %guard.cond.22, %guard.cond.21 %guard.cond.24 = and i1 %guard.cond.23, %guard.cond.22 %guard.cond.25 = and i1 %guard.cond.24, %guard.cond.23 %guard.cond.26 = and i1 %guard.cond.25, %guard.cond.24 %guard.cond.27 = and i1 %guard.cond.26, %guard.cond.25 %guard.cond.28 = and i1 %guard.cond.27, %guard.cond.26 %guard.cond.29 = and i1 %guard.cond.28, %guard.cond.27 %guard.cond.30 = and i1 %guard.cond.29, %guard.cond.28 %guard.cond.31 = and i1 %guard.cond.30, %guard.cond.29 %guard.cond.32 = and i1 %guard.cond.31, %guard.cond.30 %guard.cond.33 = and i1 %guard.cond.32, %guard.cond.31 %guard.cond.34 = and i1 %guard.cond.33, %guard.cond.32 %guard.cond.35 = and i1 %guard.cond.34, %guard.cond.33 %guard.cond.36 = and i1 %guard.cond.35, %guard.cond.34 %guard.cond.37 = and i1 %guard.cond.36, %guard.cond.35 %guard.cond.38 = and i1 %guard.cond.37, %guard.cond.36 %guard.cond.39 = and i1 %guard.cond.38, %guard.cond.37 %guard.cond.40 = and i1 %guard.cond.39, %guard.cond.38 %guard.cond.41 = and i1 %guard.cond.40, %guard.cond.39 %guard.cond.42 = and i1 %guard.cond.41, %guard.cond.40 %guard.cond.43 = and i1 %guard.cond.42, %guard.cond.41 %guard.cond.44 = and i1 %guard.cond.43, %guard.cond.42 %guard.cond.45 = and i1 %guard.cond.44, %guard.cond.43 %guard.cond.46 = and i1 %guard.cond.45, %guard.cond.44 %guard.cond.47 = and i1 %guard.cond.46, %guard.cond.45 %guard.cond.48 = and i1 %guard.cond.47, %guard.cond.46 %guard.cond.49 = and i1 %guard.cond.48, %guard.cond.47 %guard.cond.50 = and i1 %guard.cond.49, %guard.cond.48 %guard.cond.51 = and i1 %guard.cond.50, %guard.cond.49 %guard.cond.52 = and i1 %guard.cond.51, %guard.cond.50 %guard.cond.53 = and i1 %guard.cond.52, %guard.cond.51 %guard.cond.54 = and i1 %guard.cond.53, %guard.cond.52 %guard.cond.55 = and i1 %guard.cond.54, %guard.cond.53 %guard.cond.56 = and i1 %guard.cond.55, %guard.cond.54 %guard.cond.57 = and i1 %guard.cond.56, %guard.cond.55 %guard.cond.58 = and i1 %guard.cond.57, %guard.cond.56 %guard.cond.59 = and i1 %guard.cond.58, %guard.cond.57 %guard.cond.60 = and i1 %guard.cond.59, %guard.cond.58 %guard.cond.61 = and i1 %guard.cond.60, %guard.cond.59 %guard.cond.62 = and i1 %guard.cond.61, %guard.cond.60 %guard.cond.63 = and i1 %guard.cond.62, %guard.cond.61 %guard.cond.64 = and i1 %guard.cond.63, %guard.cond.62 %guard.cond.65 = and i1 %guard.cond.64, %guard.cond.63 %guard.cond.66 = and i1 %guard.cond.65, %guard.cond.64 %guard.cond.67 = and i1 %guard.cond.66, %guard.cond.65 %guard.cond.68 = and i1 %guard.cond.67, %guard.cond.66 %guard.cond.69 = and i1 %guard.cond.68, %guard.cond.67 %guard.cond.70 = and i1 %guard.cond.69, %guard.cond.68 %guard.cond.71 = and i1 %guard.cond.70, %guard.cond.69 %guard.cond.72 = and i1 %guard.cond.71, %guard.cond.70 %guard.cond.73 = and i1 %guard.cond.72, %guard.cond.71 %guard.cond.74 = and i1 %guard.cond.73, %guard.cond.72 %guard.cond.75 = and i1 %guard.cond.74, %guard.cond.73 %guard.cond.76 = and i1 %guard.cond.75, %guard.cond.74 %guard.cond.77 = and i1 %guard.cond.76, %guard.cond.75 %guard.cond.78 = and i1 %guard.cond.77, %guard.cond.76 %guard.cond.79 = and i1 %guard.cond.78, %guard.cond.77 %guard.cond.80 = and i1 %guard.cond.79, %guard.cond.78 %guard.cond.81 = and i1 %guard.cond.80, %guard.cond.79 %guard.cond.82 = and i1 %guard.cond.81, %guard.cond.80 %guard.cond.83 = and i1 %guard.cond.82, %guard.cond.81 %guard.cond.84 = and i1 %guard.cond.83, %guard.cond.82 %guard.cond.85 = and i1 %guard.cond.84, %guard.cond.83 %guard.cond.86 = and i1 %guard.cond.85, %guard.cond.84 %guard.cond.87 = and i1 %guard.cond.86, %guard.cond.85 %guard.cond.88 = and i1 %guard.cond.87, %guard.cond.86 %guard.cond.89 = and i1 %guard.cond.88, %guard.cond.87 %guard.cond.90 = and i1 %guard.cond.89, %guard.cond.88 %guard.cond.91 = and i1 %guard.cond.90, %guard.cond.89 %guard.cond.92 = and i1 %guard.cond.91, %guard.cond.90 %guard.cond.93 = and i1 %guard.cond.92, %guard.cond.91 %guard.cond.94 = and i1 %guard.cond.93, %guard.cond.92 %guard.cond.95 = and i1 %guard.cond.94, %guard.cond.93 %guard.cond.96 = and i1 %guard.cond.95, %guard.cond.94 %guard.cond.97 = and i1 %guard.cond.96, %guard.cond.95 %guard.cond.98 = and i1 %guard.cond.97, %guard.cond.96 %guard.cond.99 = and i1 %guard.cond.98, %guard.cond.97 call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond.99, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } \ No newline at end of file