Index: llvm/lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -248,7 +248,9 @@ class LoopPredication { AliasAnalysis *AA; + DominatorTree *DT; ScalarEvolution *SE; + LoopInfo *LI; BranchProbabilityInfo *BPI; Loop *L; @@ -300,10 +302,13 @@ // within the loop. We identify such unprofitable loops through BPI. bool isLoopProfitableToPredicate(); + bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); + public: - LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE, + LoopPredication(AliasAnalysis *AA, DominatorTree *DT, + ScalarEvolution *SE, LoopInfo *LI, BranchProbabilityInfo *BPI) - : AA(AA), SE(SE), BPI(BPI){}; + : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {}; bool runOnLoop(Loop *L); }; @@ -323,10 +328,12 @@ if (skipLoop(L)) return false; auto *SE = &getAnalysis().getSE(); + auto *LI = &getAnalysis().getLoopInfo(); + auto *DT = &getAnalysis().getDomTree(); BranchProbabilityInfo &BPI = getAnalysis().getBPI(); auto *AA = &getAnalysis().getAAResults(); - LoopPredication LP(AA, SE, &BPI); + LoopPredication LP(AA, DT, SE, LI, &BPI); return LP.runOnLoop(L); } }; @@ -352,7 +359,7 @@ AM.getResult(L, AR).getManager(); Function *F = L.getHeader()->getParent(); auto *BPI = FAM.getCachedResult(*F); - LoopPredication LP(&AR.AA, &AR.SE, BPI); + LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, BPI); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -953,6 +960,190 @@ return true; } +/// If we can (cheaply) find a widenable branch which controls entry into the +/// loop, return it. +static BranchInst *FindWideableTerminatorAboveLoop(Loop *L, LoopInfo &LI) { + // Walk back through any unconditional executed blocks and see if we can find + // a widenable condition which seems to control execution of this loop. Note + // that we predict that maythrow calls are likely untaken and thus that it's + // profitable to widen a branch before a maythrow call with a condition + // afterwards even though that may cause the slow path to run in a case where + // it wouldn't have otherwise. + BasicBlock *BB = L->getLoopPreheader(); + if (!BB) + return nullptr; + do { + if (BasicBlock *Pred = BB->getSinglePredecessor()) + if (BB == Pred->getSingleSuccessor()) { + BB = Pred; + continue; + } + break; + } while (true); + + if (BasicBlock *Pred = BB->getSinglePredecessor()) { + auto *Term = Pred->getTerminator(); + + Value *Cond, *WC; + BasicBlock *IfTrueBB, *IfFalseBB; + if (parseWidenableBranch(Term, Cond, WC, + IfTrueBB, IfFalseBB) && + IfTrueBB == BB) + return cast(Term); + } + return nullptr; +} + +/// Return the minimum of all analyzeable exit counts. This is an upper bound +/// on the actual exit count. If there are not at least two analyzeable exits, +/// returns SCEVCouldNotCompute. +static const SCEV* +getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE, + DominatorTree &DT, Loop *L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + SmallVector ExitCounts; + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); + if (isa(ExitCount)) + continue; + assert(DT.dominates(ExitingBB, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + } + if (ExitCounts.size() < 2) + return SE.getCouldNotCompute(); + return SE.getUMinFromMismatchedTypes(ExitCounts); +} + + +/// This implements an analogous, but entirely distinct transform from the main +/// loop predication transform. This one is phrased in terms of using a +/// widenable branch *outside* the loop to allow us to simplify loop exits in a +/// following loop. This is closer is spirit to the IndVarSimplify transform +/// of the same name, but is materially different widening loosens legality +/// sharply. +bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { + // The transformation performed here aims to widen a widenable condition + // above the loop such that all analyzeable exit leading to deopt are dead. + // It assumes that the latch is the dominant exit for profitability and that + // exits branching to deoptimizing blocks are rarely taken. It relies on the + // semantics of widenable expressions for legality. (i.e. being able to fall + // down the widenable path spuriously allows us to ignore exit order, + // unanalyzeable exits, side effects, exceptional exits, and other challenges + // which restrict the applicability of the non-WC based version of this + // transform in IndVarSimplify.) + // + // NOTE ON POISON/UNDEF - There's a latent problem around undef/poison here. + // We're hoisting an expression above guards which may imply flags on the + // expression being hoisted and inserting new uses (flags are only correct + // for current uses). The result is that we may be inserting a branch on the + // value which can be either poison or undef. This is *exactly* the same + // problem which exists in unswitch. The long term solution is the same: the + // new freeze instruction. In the meantime, this relies on the same + // conservatism in the optimizer that unswitch does for correctness. + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + if (ExitingBlocks.empty()) + return false; // Nothing to do. + + /* nullable */ auto *Latch = L->getLoopLatch(); + + auto *WidenableBR = FindWideableTerminatorAboveLoop(L, *LI); + if (!WidenableBR) + return false; + + // The use of umin(all analyzeable exits) instead of latch is subtle, but + // important for profitability. We may have a loop which hasn't been fully + // canonicalized just yet. If the exit we chose to widen is provably never + // taken, we want the widened form to *also* be provably never taken. We + // can't guarantee this as a current unanalyzeable exit may later become + // analyzeable, but we can at least avoid the obvious cases. + const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L); + if (isa(MinEC) || + MinEC->getType()->isPointerTy() || + !SE->isLoopInvariant(MinEC, L) || + !isSafeToExpandAt(MinEC, WidenableBR, *SE)) + return false; + + Rewriter.setInsertPoint(WidenableBR); + IRBuilder<> B(WidenableBR); + + bool Changed = false; + Value *MinECV = nullptr; //lazy generated if needed + for (BasicBlock *ExitingBB : ExitingBlocks) { + // If our exiting block exits multiple loops, we can only rewrite the + // innermost one. Otherwise, we're changing how many times the innermost + // loop runs before it exits. + if (LI->getLoopFor(ExitingBB) != L) + continue; + + // Can't rewrite non-branch yet. + auto *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + continue; + + // If already constant, nothing to do. + if (isa(BI->getCondition())) + continue; + + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + if (isa(ExitCount) || + ExitCount->getType()->isPointerTy() || + !isSafeToExpandAt(ExitCount, WidenableBR, *SE)) + continue; + + const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); + BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1); + if (!ExitBB->getTerminatingDeoptimizeCall()) + // Profitability: indicator of rarely/never taken exit + continue; + + // If we found a widenable exit condition, do two things: + // 1) fold the widened exit test into the widenable condition + // 2) fold the branch to untaken - avoids infinite looping + + Value *ECV = Rewriter.expandCodeFor(ExitCount); + if (!MinECV) + MinECV = Rewriter.expandCodeFor(MinEC); + Value *RHS = MinECV; + if (ECV->getType() != RHS->getType()) { + Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); + ECV = B.CreateZExt(ECV, WiderTy); + RHS = B.CreateZExt(RHS, WiderTy); + } + assert(!Latch || DT->dominates(ExitingBB, Latch)); + Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS); + // TODO: NewCond = B.CreateFreeze(NewCond) once freeze inst submitted. + // See NOTE ON POISON/UNDEF above for context. + + Value *Cond, *WC; + BasicBlock *IfTrueBB, *IfFalseBB; + bool Success = parseWidenableBranch(WidenableBR, Cond, WC, + IfTrueBB, IfFalseBB); + assert(Success && "implied from above"); + NewCond = B.CreateAnd(B.CreateAnd(NewCond, Cond), WC); + WidenableBR->setCondition(NewCond); + + Value *OldCond = BI->getCondition(); + BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue)); + Changed = true; + } + + if (Changed) + // We just mutated a bunch of loop exits changing there exit counts + // widely. We need to force recomputation of the exit counts given these + // changes. Note that all of the inserted exits are never taken, and + // should be removed next time the CFG is modified. + SE->forgetLoop(L); + return Changed; +} + + bool LoopPredication::runOnLoop(Loop *Loop) { L = Loop; @@ -1004,16 +1195,14 @@ cast(BB->getTerminator())); } - if (Guards.empty() && GuardsAsWidenableBranches.empty()) - return false; - SCEVExpander Expander(*SE, *DL, "loop-predication"); - bool Changed = false; - for (auto *Guard : Guards) - Changed |= widenGuardConditions(Guard, Expander); - for (auto *Guard : GuardsAsWidenableBranches) - Changed |= widenWidenableBranchGuardConditions(Guard, Expander); - + if (!Guards.empty() || !GuardsAsWidenableBranches.empty()) { + for (auto *Guard : Guards) + Changed |= widenGuardConditions(Guard, Expander); + for (auto *Guard : GuardsAsWidenableBranches) + Changed |= widenWidenableBranchGuardConditions(Guard, Expander); + } + Changed |= predicateLoopExits(L, Expander); return Changed; } Index: llvm/test/Transforms/LoopPredication/predicate-exits.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopPredication/predicate-exits.ll @@ -0,0 +1,757 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-predication -S | FileCheck %s + +declare void @prevent_merging() + +; Base case - with side effects in loop +define i32 @test1(i32* %array, i32 %length, i32 %n, i1 %cond_0) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[COND_0]] +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + + + +define i32 @test_non_canonical(i32* %array, i32 %length, i1 %cond_0) { +; CHECK-LABEL: @test_non_canonical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[LENGTH:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[LENGTH]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[COND_0]] +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %length + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + + +define i32 @test_two_range_checks(i32* %array, i32 %length.1, i32 %length.2, i32 %n, i1 %cond_0) { +; CHECK-LABEL: @test_two_range_checks( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0]] +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH_1]] +; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH_2]] +; CHECK-NEXT: br i1 true, label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK: deopt3: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET3]] +; CHECK: guarded2: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED2]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded2 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded2 ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length.1 + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %within.bounds2 = icmp ult i32 %i, %length.2 + br i1 %within.bounds2, label %guarded2, label %deopt3, !prof !0 + +deopt3: + call void @unknown() + %deoptret3 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret3 + +guarded2: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded2 ] + ret i32 %result +} + +@G = external global i32 + +define i32 @test_unanalyzeable_exit(i32* %array, i32 %length, i32 %n, i1 %cond_0) { +; CHECK-LABEL: @test_unanalyzeable_exit( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 +; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK: deopt3: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET3]] +; CHECK: guarded2: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED2]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded2 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded2 ], [ 0, %loop.preheader ] + call void @unknown() + %vol = load volatile i32, i32* @G + %unknown = icmp eq i32 %vol, 0 + br i1 %unknown, label %guarded2, label %deopt3, !prof !0 + +deopt3: + call void @unknown() + %deoptret3 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret3 + +guarded2: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded2 ] + ret i32 %result +} + +define i32 @test_unanalyzeable_exit2(i32* %array, i32 %length, i32 %n, i1 %cond_0) { +; CHECK-LABEL: @test_unanalyzeable_exit2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[COND_0]] +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 +; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK: deopt3: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET3]] +; CHECK: guarded2: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED2]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded2 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded2 ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %vol = load volatile i32, i32* @G + %unknown = icmp eq i32 %vol, 0 + br i1 %unknown, label %guarded2, label %deopt3, !prof !0 + +deopt3: + call void @unknown() + %deoptret3 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret3 + +guarded2: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded2 ] + ret i32 %result +} + + +define i32 @test_unanalyzeable_latch(i32* %array, i32 %length, i32 %n, i1 %cond_0) { +; CHECK-LABEL: @test_unanalyzeable_latch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 +; CHECK-NEXT: br i1 [[UNKNOWN]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %vol = load volatile i32, i32* @G + %unknown = icmp eq i32 %vol, 0 + br i1 %unknown, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + + +define i32 @provably_taken(i32* %array, i1 %cond_0) { +; CHECK-LABEL: @provably_taken( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 false, [[COND_0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], 198 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], 200 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, 198 + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, 200 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +define i32 @provably_not_taken(i32* %array, i1 %cond_0) { +; CHECK-LABEL: @provably_not_taken( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 true, [[COND_0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], 205 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], 200 +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, 205 + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, 200 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + + + + + + + + +; Non-latch exits can still be predicated +define i32 @unconditional_latch(i32* %array, i32 %length, i1 %cond_0) { +; CHECK-LABEL: @unconditional_latch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + br label %loop +} + + +declare void @unknown() + +declare i1 @llvm.experimental.widenable.condition() +declare i32 @llvm.experimental.deoptimize.i32(...) + +!0 = !{!"branch_weights", i32 1048576, i32 1} +!1 = !{i32 1, i32 -2147483648} +!2 = !{i32 0, i32 50}