Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -124,6 +124,11 @@ DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization")); +static cl::opt +LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false), + cl::desc("Predicate conditions in read only loops")); + + namespace { struct RewritePhi; @@ -144,7 +149,7 @@ bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); - bool optimizeLoopExits(Loop *L); + bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); @@ -2641,7 +2646,7 @@ return MadeAnyChanges; } -bool IndVarSimplify::optimizeLoopExits(Loop *L) { +bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -2719,8 +2724,7 @@ assert(MaxExitCount->getType() == ExitCount->getType()); // Can we prove that some other exit must be taken strictly before this - // one? TODO: handle cases where ule is known, and equality is covered - // by a dominating exit + // one? if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxExitCount, ExitCount)) { bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); @@ -2737,14 +2741,136 @@ // TODO: If we can prove that the exiting iteration is equal to the exit // count for this exit and that no previous exit oppurtunities exist within // the loop, then we can discharge all other exits. (May fall out of - // previous TODO.) - - // TODO: If we can't prove any relation between our exit count and the - // loops exit count, but taking this exit doesn't require actually running - // the loop (i.e. no side effects, no computed values used in exit), then - // we can replace the exit test with a loop invariant test which exits on - // the first iteration. + // previous TODO.) } + + // Finally, see if we can rewrite our exit conditions into a loop invariant + // form. If we have a read-only loop, and we can tell that we must exit down + // a path which does not need any of the values computed within the loop, we + // can rewrite the loop to exit on the first iteration. Note that this + // doesn't either a) tell us the loop exits on the first iteration (unless + // *all* exits are predicateable) or b) tell us *which* exit might be taken. + // This transformation looks a lot like a restricted form of dead loop + // elimination, but restricted to read-only loops and without neccesssarily + // needing to kill the loop entirely. + if (!LoopPredication) + return Changed; + + if (!SE->hasLoopInvariantBackedgeTakenCount(L)) + return Changed; + + // Note: ExactBTC is the exact backedge taken count *iff* the loop exits + // through *explicit* control flow. We have to eliminate the possibility of + // implicit exits (see below) before we know it's truly exact. + const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); + if (isa(ExactBTC) || + !SE->isLoopInvariant(ExactBTC, L) || + !isSafeToExpand(ExactBTC, *SE)) + return Changed; + + auto Filter = [&](BasicBlock *ExitingBB) { + // If our exiting block exits multiple loops, we can only rewrite the + // innermost one. Otherwise, we're changing how many times the innermost + // loop runs before it exits. + if (LI->getLoopFor(ExitingBB) != L) + return true; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + return true; + + // If already constant, nothing to do. + if (isa(BI->getCondition())) + return true; + + // If the exit block has phis, we need to be able to compute the values + // within the loop which contains them. This assumes trivially lcssa phis + // have already been removed; TODO: generalize + BasicBlock *ExitBlock = + BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); + if (!empty(ExitBlock->phis())) + return true; + + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + assert(!isa(ExactBTC) && "implied by having exact trip count"); + if (!SE->isLoopInvariant(ExitCount, L) || + !isSafeToExpand(ExitCount, *SE)) + return true; + + return false; + }; + auto Erased = std::remove_if(ExitingBlocks.begin(), ExitingBlocks.end(), + Filter); + ExitingBlocks.erase(Erased, ExitingBlocks.end()); + + if (ExitingBlocks.empty()) + return Changed; + + // We rely on not being able to reach an exiting block on a later iteration + // than it's statically compute exit count. The implementaton of + // getExitCount currently has this invariant, but assert it here so that + // breakage is obvious if this ever changes.. + assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { + return DT->dominates(ExitingBB, L->getLoopLatch()); + })); + + // At this point, ExitingBlocks consists of only those blocks which are + // predicatable. Given that, we know we have at least one exit we can + // predicate if the loop is doesn't have side effects and doesn't have any + // implicit exits (because then our exact BTC isn't actually exact). + // @Reviewers - As structured, this is O(I^2) for loop nests. Any + // suggestions on how to improve this? I can obviously bail out for outer + // loops, but that seems less than ideal. MemorySSA can find memory writes, + // is that enough for *all* side effects? + for (BasicBlock *BB : L->blocks()) + for (auto &I : *BB) + // TODO:isGuaranteedToTransfer + if (I.mayHaveSideEffects() || I.mayThrow()) + return Changed; + + // Finally, do the actual predication for all predicatable blocks. A couple + // of notes here: + // 1) We don't bother to constant fold dominated exits with identical exit + // counts; that's simply a form of CSE/equality propagation and we leave + // it for dedicated passes. + // 2) We insert the comparison at the branch. Hoisting introduces additional + // legality constraints and we leave that to dedicated logic. We want to + // predicate even if we can't insert a loop invariant expression as + // peeling or unrolling will likely reduce the cost of the otherwise loop + // varying check. + Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); + IRBuilder<> B(L->getLoopPreheader()->getTerminator()); + Value *ExactBTCV = nullptr; //lazy generated if needed + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + + auto *BI = cast(ExitingBB->getTerminator()); + Value *NewCond; + if (ExitCount == ExactBTC) { + NewCond = L->contains(BI->getSuccessor(0)) ? + B.getFalse() : B.getTrue(); + } else { + Value *ECV = Rewriter.expandCodeFor(ExitCount); + if (!ExactBTCV) + ExactBTCV = Rewriter.expandCodeFor(ExactBTC); + Value *RHS = ExactBTCV; + if (ECV->getType() != RHS->getType()) { + Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); + ECV = B.CreateZExt(ECV, WiderTy); + RHS = B.CreateZExt(RHS, WiderTy); + } + auto Pred = L->contains(BI->getSuccessor(0)) ? + ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + NewCond = B.CreateICmp(Pred, ECV, RHS); + } + Value *OldCond = BI->getCondition(); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.push_back(OldCond); + Changed = true; + } + return Changed; } @@ -2804,7 +2930,7 @@ // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); - Changed |= optimizeLoopExits(L); + Changed |= optimizeLoopExits(L, Rewriter); // If we have a trip count expression, rewrite the loop's exit condition // using it. Index: llvm/trunk/test/Transforms/IndVarSimplify/loop-predication.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/loop-predication.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/loop-predication.ll @@ -0,0 +1,790 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -indvars -indvars-predicate-loops=1 -S | FileCheck %s + +declare void @prevent_merging() + +; Base case +define i32 @test1(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; Has side effect which must be reflected +define i32 @neg_store(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @neg_store( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + store i32 0, i32* %array.i.ptr + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +declare void @maythrow() + +; May exit through implicit exception edge +define i32 @neg_implicit_exit(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @neg_implicit_exit( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @maythrow() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @maythrow() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + + + +; Base case, but in LFTR form (just for sanity checking) +define i32 @test2(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP0]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[LENGTH]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ne i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ne i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; br (and rcheck1, rcheck2) +define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32 %n) { +; CHECK-LABEL: @two_range_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH_2]], [[LENGTH_1]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[UMIN1]], [[TMP3]] +; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[TMP4]], i32 [[UMIN1]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.2.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_range_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_3:%.*]], [[LENGTH_2:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN]], [[LENGTH_1:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[UMIN]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH_3]], [[LENGTH_2]] +; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[TMP2]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN2]], [[LENGTH_1]] +; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[TMP3]], i32 [[UMIN2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP4]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[UMIN3]], [[TMP5]] +; CHECK-NEXT: [[UMIN4:%.*]] = select i1 [[TMP6]], i32 [[UMIN3]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds.3 = icmp ult i32 %i, %length.3 + %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; Analogous to the above, but with two distinct branches (on different conditions) +define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @distinct_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded1: +; CHECK-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded4, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %within.bounds.2 = icmp ult i32 %i, %length.2 + br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void @prevent_merging() + ret i32 -1 + +guarded1: ; preds = %guarded1 + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded1 ] + ret i32 %result +} + +define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %length, i32 %n) { +; CHECK-LABEL: @duplicate_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded1: +; CHECK-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded4, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length + br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %within.bounds.2 = icmp ult i32 %i, %length + br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void @prevent_merging() + ret i32 -1 + +guarded1: ; preds = %guarded1 + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded1 ] + ret i32 %result +} + + +define i32 @provably_taken(i32* %array, i32* %length.ptr) { +; CHECK-LABEL: @provably_taken( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 false, label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: br i1 true, label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: + %length = load i32, i32* %length.ptr, !range !2 + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp slt i32 %i.next, 200 + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; Non-latch exits can still be predicated +define i32 @unconditional_latch(i32* %a, i32 %length) { +; CHECK-LABEL: @unconditional_latch( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 false, label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: br label [[LOOP]] +; +loop.preheader: + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %i = phi i32 [ %i.next, %guarded ], [ 400, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.next = add i32 %i, 1 + br label %loop +} + +; Side effect in loop must run proper number of times +define i32 @unconditional_latch_with_side_effect(i32* %a, i32 %length) { +; CHECK-LABEL: @unconditional_latch_with_side_effect( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED:%.*]] ], [ 400, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: store volatile i32 0, i32* [[A:%.*]] +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 +; CHECK-NEXT: br label [[LOOP]] +; +loop.preheader: + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %i = phi i32 [ %i.next, %guarded ], [ 400, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + store volatile i32 0, i32* %a + %i.next = add i32 %i, 1 + br label %loop +} + +; Demonstrate that this approach works with IVs of different steps, and types +; This version uses a manually lftred exit condition to work around an issue described +; in detail on next test. +define i32 @different_ivs(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @different_ivs( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N64]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N64]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP3]], i64 [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: + %j.start = sub nuw nsw i32 %length, 1 + %n64 = zext i32 %n to i64 + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i64 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %guarded ], [ %j.start, %loop.preheader ] + %within.bounds = icmp ne i32 %j, -1 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: + call void @prevent_merging() + ret i32 -1 + +guarded: + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i64 %i, 1 + %j.next = sub nuw i32 %j, 1 + %continue = icmp ult i64 %i.next, %n64 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; TODO: We're failing to compute an exit count for the bounds check. +; From some quick analysis, it looks like we don't handle -1 step +; in howManyLessThans. Should be a simple fix. +define i32 @different_ivs2(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @different_ivs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POS_LENGTH:%.*]] = icmp sgt i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: br i1 [[POS_LENGTH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[J_START:%.*]] = sub nuw nsw i32 [[LENGTH]], 1 +; CHECK-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[GUARDED]] ], [ [[J_START]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[J]], [[LENGTH]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[J_NEXT]] = sub nuw i32 [[J]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[LOOP_ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_ACC_NEXT_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %pos_length = icmp sgt i32 %length, 0 + br i1 %pos_length, label %loop.preheader, label %exit + +loop.preheader: + %j.start = sub nuw nsw i32 %length, 1 + %n64 = zext i32 %n to i64 + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i64 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %guarded ], [ %j.start, %loop.preheader ] + %within.bounds = icmp ult i32 %j, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: + call void @prevent_merging() + ret i32 -1 + +guarded: + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i64 %i, 1 + %j.next = sub nuw i32 %j, 1 + %continue = icmp ult i64 %i.next, %n64 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ], [0, %entry] + ret i32 %result +} + + + +declare i32 @llvm.experimental.deoptimize.i32(...) + +!0 = !{!"branch_weights", i32 1048576, i32 1} +!1 = !{i32 1, i32 -2147483648} +!2 = !{i32 0, i32 50}