Index: llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp @@ -34,6 +34,120 @@ // else // deoptimize // +// It's tempting to rely on SCEV here, but it has proven to be problematic. +// Generally the facts SCEV provides about the increment step of add +// recurrences are true if the backedge of the loop is taken, which implicitly +// assumes that the guard doesn't fail. Using these facts to optimize the +// guard results in a circular logic where the guard is optimized under the +// assumption that it never fails. +// +// For example, in the loop below the induction variable will be marked as nuw +// basing on the guard. Basing on nuw the guard predicate will be considered +// monotonic. Given a monotonic condition it's tempting to replace the induction +// variable in the condition with its value on the last iteration. But this +// transformation is not correct, e.g. e = 4, b = 5 breaks the loop. +// +// for (int i = b; i != e; i++) +// guard(i u< len) +// +// One of the ways to reason about this problem is to use an inductive proof +// approach. Given the loop: +// +// if (B(Start)) { +// do { +// I = PHI(Start, I.INC) +// I.INC = I + Step +// guard(G(I)); +// } while (B(I.INC)); +// } +// +// where B(x) and G(x) are predicates that map integers to booleans, we want a +// loop invariant expression M such the following program has the same semantics +// as the above: +// +// if (B(Start)) { +// do { +// I = PHI(Start, I.INC) +// I.INC = I + Step +// guard(G(Start) && M); +// } while (B(I.INC)); +// } +// +// One solution for M is M = forall X . (G(X) && B(X + Step)) => G(X + Step) +// +// Informal proof that the transformation above is correct: +// +// By the definition of guards we can rewrite the guard condition to: +// G(I) && G(Start) && M +// +// Let's prove that for each iteration of the loop: +// G(Start) && M => G(I) +// And the condition above can be simplified to G(Start) && M. +// +// Induction base. +// G(Start) && M => G(Start) +// +// Induction step. Assuming G(Start) && M => G(I) on the subsequent +// iteration: +// +// B(I + Step) is true because it's the backedge condition. +// G(I) is true because the backedge is guarded by this condition. +// +// So M = forall X . (G(X) && B(X + Step)) => G(X + Step) implies +// G(I + Step). +// +// Note that we can use anything stronger than M, i.e. any condition which +// implies M. +// +// For now the transformation is limited to the following case: +// * The loop has a single latch with either ult or slt icmp condition. +// * The step of the IV used in the latch condition is 1. +// * The IV of the latch condition is the same as the post increment IV of the +// guard condition. +// * The guard condition is ult. +// +// In this case the latch is of the from: +// ++i u< latchLimit or ++i s< latchLimit +// and the guard is of the form: +// i u< guardLimit +// +// For the unsigned latch comparison case M is: +// forall X . X u< guardLimit && (X + 1) u< latchLimit => +// (X + 1) u< guardLimit +// +// This is true if latchLimit u<= guardLimit since then +// (X + 1) u< latchLimit u<= guardLimit == (X + 1) u< guardLimit. +// +// So the widened condition is: +// i.start u< guardLimit && latchLimit u<= guardLimit +// +// For the signed latch comparison case M is: +// forall X . X u< guardLimit && (X + 1) s< latchLimit => +// (X + 1) u< guardLimit +// +// The only way the antecedent can be true and the consequent can be false is +// if +// X == guardLimit - 1 +// (and guardLimit is non-zero, but we won't use this latter fact). +// If X == guardLimit - 1 then the second half of the antecedent is +// guardLimit s< latchLimit +// and its negation is +// latchLimit s<= guardLimit. +// +// In other words, if latchLimit s<= guardLimit then: +// (the ranges below are written in ConstantRange notation, where [A, B) is the +// set for (I = A; I != B; I++ /*maywrap*/) yield(I);) +// +// forall X . X u< guardLimit && (X + 1) s< latchLimit => (X + 1) u< guardLimit +// == forall X . X u< guardLimit && (X + 1) s< guardLimit => (X + 1) u< guardLimit +// == forall X . X in [0, guardLimit) && (X + 1) in [INT_MIN, guardLimit) => (X + 1) in [0, guardLimit) +// == forall X . X in [0, guardLimit) && X in [INT_MAX, guardLimit-1) => X in [-1, guardLimit-1) +// == forall X . X in [0, guardLimit-1) => X in [-1, guardLimit-1) +// == true +// +// So the widened condition is: +// i.start u< guardLimit && latchLimit s<= guardLimit +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" @@ -75,8 +189,16 @@ Loop *L; const DataLayout *DL; BasicBlock *Preheader; + LoopICmp LatchCheck; - Optional parseLoopICmp(ICmpInst *ICI); + Optional parseLoopICmp(ICmpInst *ICI) { + return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0), + ICI->getOperand(1)); + } + Optional parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS, + Value *RHS); + + Optional parseLoopLatchICmp(); Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, @@ -135,11 +257,8 @@ } Optional -LoopPredication::parseLoopICmp(ICmpInst *ICI) { - ICmpInst::Predicate Pred = ICI->getPredicate(); - - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); +LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS, + Value *RHS) { const SCEV *LHSS = SE->getSCEV(LHS); if (isa(LHSS)) return None; @@ -165,6 +284,8 @@ IRBuilder<> &Builder, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Instruction *InsertAt) { + // TODO: we can check isLoopEntryGuardedByCond before emitting the check + Type *Ty = LHS->getType(); assert(Ty == RHS->getType() && "expandCheck operands have different types?"); Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); @@ -181,51 +302,54 @@ DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); DEBUG(ICI->dump()); + // parseLoopStructure guarantees that the latch condition is: + // ++i u< latchLimit or ++i s< latchLimit + // We are looking for the range checks of the form: + // i u< guardLimit auto RangeCheck = parseLoopICmp(ICI); if (!RangeCheck) { DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); return None; } - - ICmpInst::Predicate Pred = RangeCheck->Pred; - const SCEVAddRecExpr *IndexAR = RangeCheck->IV; - const SCEV *RHSS = RangeCheck->Limit; - - auto CanExpand = [this](const SCEV *S) { - return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); - }; - if (!CanExpand(RHSS)) + if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { + DEBUG(dbgs() << "Unsupported range check predicate(" << RangeCheck->Pred + << ")!\n"); return None; - - DEBUG(dbgs() << "IndexAR: "); - DEBUG(IndexAR->dump()); - - bool IsIncreasing = false; - if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing)) - return None; - - // If the predicate is increasing the condition can change from false to true - // as the loop progresses, in this case take the value on the first iteration - // for the widened check. Otherwise the condition can change from true to - // false as the loop progresses, so take the value on the last iteration. - const SCEV *NewLHSS = IsIncreasing - ? IndexAR->getStart() - : SE->getSCEVAtScope(IndexAR, L->getParentLoop()); - if (NewLHSS == IndexAR) { - DEBUG(dbgs() << "Can't compute NewLHSS!\n"); + } + auto *RangeCheckIV = RangeCheck->IV; + auto *PostIncRangeCheckIV = RangeCheckIV->getPostIncExpr(*SE); + if (LatchCheck.IV != PostIncRangeCheckIV) { + DEBUG(dbgs() << "Post increment range check IV (" << *PostIncRangeCheckIV + << ") is not the same as latch IV (" << *LatchCheck.IV + << ")!\n"); return None; } + assert(RangeCheckIV->getStepRecurrence(*SE)->isOne() && "must be one"); + const SCEV *Start = RangeCheckIV->getStart(); - DEBUG(dbgs() << "NewLHSS: "); - DEBUG(NewLHSS->dump()); + // Generate the widened condition. See the file header comment for reasoning. + // If the latch condition is unsigned: + // i.start u< guardLimit && latchLimit u<= guardLimit + // If the latch condition is signed: + // i.start u< guardLimit && latchLimit s<= guardLimit + + auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred) + ? ICmpInst::ICMP_SLE + : ICmpInst::ICMP_ULE; - if (!CanExpand(NewLHSS)) + auto CanExpand = [this](const SCEV *S) { + return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); + }; + if (!CanExpand(Start) || !CanExpand(LatchCheck.Limit) || + !CanExpand(RangeCheck->Limit)) return None; - DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n"); - Instruction *InsertAt = Preheader->getTerminator(); - return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt); + auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck->Pred, + Start, RangeCheck->Limit, InsertAt); + auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, + LatchCheck.Limit, RangeCheck->Limit, InsertAt); + return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, @@ -288,6 +412,59 @@ return true; } +Optional LoopPredication::parseLoopLatchICmp() { + using namespace PatternMatch; + + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { + DEBUG(dbgs() << "The loop doesn't have a single latch!\n"); + return None; + } + + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + BasicBlock *TrueDest, *FalseDest; + + if (!match(LoopLatch->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest, + FalseDest))) { + DEBUG(dbgs() << "Failed to match the latch terminator!\n"); + return None; + } + assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) && + "One of the latch's destinations must be the header"); + if (TrueDest != L->getHeader()) + Pred = ICmpInst::getInversePredicate(Pred); + + auto Result = parseLoopICmp(Pred, LHS, RHS); + if (!Result) { + DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); + return None; + } + + if (Result->Pred != ICmpInst::ICMP_ULT && + Result->Pred != ICmpInst::ICMP_SLT) { + DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred + << ")!\n"); + return None; + } + + // Check affine first, so if it's not we don't try to compute the step + // recurrence. + if (!Result->IV->isAffine()) { + DEBUG(dbgs() << "The induction variable is not affine!\n"); + return None; + } + + auto *Step = Result->IV->getStepRecurrence(*SE); + if (!Step->isOne()) { + DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); + return None; + } + + return Result; +} + bool LoopPredication::runOnLoop(Loop *Loop) { L = Loop; @@ -308,6 +485,11 @@ if (!Preheader) return false; + auto LatchCheckOpt = parseLoopLatchICmp(); + if (!LatchCheckOpt) + return false; + LatchCheck = *LatchCheckOpt; + // Collect all the guards into a vector and process later, so as not // to invalidate the instruction iterator. SmallVector Guards; Index: llvm/trunk/test/Transforms/LoopPredication/basic.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/basic.ll +++ llvm/trunk/test/Transforms/LoopPredication/basic.ll @@ -11,8 +11,9 @@ loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -46,8 +47,9 @@ loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -73,44 +75,35 @@ ret i32 %result } - -define i32 @two_range_checks(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, i32 %n) { -; CHECK-LABEL: @two_range_checks +define i32 @signed_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_ult_check entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - %within.bounds.1 = icmp ult i32 %i, %length.1 - %within.bounds.2 = icmp ult i32 %i, %length.2 - %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.1, %array.2.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -118,52 +111,33 @@ ret i32 %result } -define i32 @three_range_checks(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, - i32* %array.3, i32 %length.3, i32 %n) { -; CHECK-LABEL: @three_range_checks +define i32 @unsupported_latch_pred_loop_0_to_n(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @unsupported_latch_pred_loop_0_to_n entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} -; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] -; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: %within.bounds = icmp ult i32 %i, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - %within.bounds.1 = icmp ult i32 %i, %length.1 - %within.bounds.2 = icmp ult i32 %i, %length.2 - %within.bounds.3 = icmp ult i32 %i, %length.3 - %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 - %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.2 = add i32 %loop.acc.1, %array.2.i - - %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 - %array.3.i = load i32, i32* %array.3.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i - %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %i.next = add nsw i32 %i, 1 + %continue = icmp ne i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -171,56 +145,33 @@ ret i32 %result } -define i32 @three_guards(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, - i32* %array.3, i32 %length.3, i32 %n) { -; CHECK-LABEL: @three_guards +define i32 @signed_loop_0_to_n_unsupported_iv_step(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_unsupported_iv_step entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1 -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2 -; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3 ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ] - +; CHECK: %within.bounds = icmp ult i32 %i, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - - %within.bounds.1 = icmp ult i32 %i, %length.1 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ] + %within.bounds = icmp ult i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %within.bounds.2 = icmp ult i32 %i, %length.2 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ] - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.2 = add i32 %loop.acc.1, %array.2.i - - %within.bounds.3 = icmp ult i32 %i, %length.3 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ] - - %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 - %array.3.i = load i32, i32* %array.3.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i - %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %i.next = add nsw i32 %i, 2 + %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -228,15 +179,17 @@ ret i32 %result } -define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check +define i32 @signed_loop_0_to_n_equal_iv_range_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_equal_iv_range_check entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0 +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -244,8 +197,10 @@ ; CHECK: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds = icmp sge i32 %i, 0 + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %loop ], [ 0, %loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -253,6 +208,7 @@ %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i + %j.next = add nsw i32 %j, 1 %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit @@ -262,28 +218,26 @@ ret i32 %result } -define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check +define i32 @signed_loop_0_to_n_unrelated_iv_range_check(i32* %array, i32 %start, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_unrelated_iv_range_check entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1 -; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] -; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: %within.bounds = icmp ult i32 %j, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds = icmp slt i32 %i, %length + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %loop ], [ %start, %loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -291,6 +245,7 @@ %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i + %j.next = add nsw i32 %j, 1 %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit @@ -300,41 +255,166 @@ ret i32 %result } -define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_both_checks +define i32 @two_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, i32 %n) { +; CHECK-LABEL: @two_range_checks entry: - %tmp5 = icmp sle i32 %n, 0 + %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0 -; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1 -; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] -; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 -; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds.1 = icmp slt i32 %i, %length - %within.bounds.2 = icmp sge i32 %i, 0 + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 %within.bounds = and i1 %within.bounds.1, %within.bounds.2 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 - %array.i = load i32, i32* %array.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc, %array.i + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i - %i.next = add nsw i32 %i, 1 - %continue = icmp slt i32 %i.next, %n + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.2.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_range_checks +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] +; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds.3 = icmp ult i32 %i, %length.3 + %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_guards(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_guards +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] +; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ] + + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + + %within.bounds.1 = icmp ult i32 %i, %length.1 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %within.bounds.2 = icmp ult i32 %i, %length.2 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ] + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %within.bounds.3 = icmp ult i32 %i, %length.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ] + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -350,8 +430,9 @@ loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -439,12 +520,12 @@ loop: ; CHECK: loop: ; CHECK: %bound = add i32 %i, %x -; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound +; CHECK-NEXT: %within.bounds = icmp ult i32 %i, %bound ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] %bound = add i32 %i, %x - %within.bounds = icmp slt i32 %i, %bound + %within.bounds = icmp ult i32 %i, %bound call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -503,9 +584,10 @@ loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[length:[^ ]+]] = zext i16 %length.i16 to i32 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], [[length]] +; CHECK: [[length:[^ ]+]] = zext i16 %length.i16 to i32 +; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, [[length]] +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, [[length]] +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop Index: llvm/trunk/test/Transforms/LoopPredication/nested.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/nested.ll +++ llvm/trunk/test/Transforms/LoopPredication/nested.ll @@ -10,8 +10,6 @@ br i1 %tmp5, label %exit, label %outer.loop.preheader outer.loop.preheader: -; CHECK: outer.loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1 br label %outer.loop outer.loop: @@ -22,7 +20,10 @@ inner.loop.preheader: ; CHECK: inner.loop.preheader: -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %l, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %inner.loop br label %inner.loop inner.loop: @@ -31,7 +32,7 @@ %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %j, %length + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %j.i64 = zext i32 %j to i64 @@ -62,8 +63,10 @@ outer.loop.preheader: ; CHECK: outer.loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %outer.loop br label %outer.loop outer.loop: @@ -82,7 +85,7 @@ %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %i, %length + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -112,14 +115,15 @@ br i1 %tmp5, label %exit, label %outer.loop.preheader outer.loop.preheader: +; CHECK: outer.loop.preheader: +; CHECK-NEXT: [[first_iteration_check_outer:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check_outer:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond_outer:[^ ]+]] = and i1 [[first_iteration_check_outer]], [[limit_check_outer]] +; CHECK-NEXT: br label %outer.loop br label %outer.loop outer.loop: ; CHECK: outer.loop: -; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1 -; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]] -; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1 %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %tmp6 = icmp sle i32 %l, 0 @@ -127,16 +131,69 @@ inner.loop.preheader: ; CHECK: inner.loop.preheader: -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length +; CHECK: [[limit_check_inner:[^ ]+]] = icmp sle i32 %l, %length +; CHECK: br label %inner.loop br label %inner.loop inner.loop: ; CHECK: inner.loop: +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[limit_check_inner]], [[wide_cond_outer]] ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %j, %length + %within.bounds = icmp ult i32 %j, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %j.i64 = zext i32 %j to i64 + %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64 + %array.j = load i32, i32* %array.j.ptr, align 4 + %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j + + %j.next = add nsw i32 %j, 1 + %inner.continue = icmp slt i32 %j.next, %l + br i1 %inner.continue, label %inner.loop, label %outer.loop.inc + +outer.loop.inc: + %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] + %i.next = add nsw i32 %i, 1 + %outer.continue = icmp slt i32 %i.next, %n + br i1 %outer.continue, label %outer.loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] + ret i32 %result +} + +define i32 @cant_expand_guard_check_start(i32* %array, i32 %length, i32 %n, i32 %l, i32 %maybezero) { +; CHECK-LABEL: @cant_expand_guard_check_start +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %outer.loop.preheader + +outer.loop.preheader: + br label %outer.loop + +outer.loop: + %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %tmp6 = icmp sle i32 %l, 0 + %div = udiv i32 %i, %maybezero + br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader + +inner.loop.preheader: +; CHECK: inner.loop.preheader: +; CHECK: br label %inner.loop + br label %inner.loop + +inner.loop: +; CHECK: inner.loop: +; CHECK: %within.bounds = icmp ult i32 %j, %length +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] + %j = phi i32 [ %j.next, %inner.loop ], [ %div, %inner.loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %j.i64 = zext i32 %j to i64 Index: llvm/trunk/test/Transforms/LoopPredication/visited.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/visited.ll +++ llvm/trunk/test/Transforms/LoopPredication/visited.ll @@ -11,8 +11,9 @@ loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop