Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -144,6 +144,7 @@ bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); + bool optimizeLoopExits(Loop *L); bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); @@ -2645,6 +2646,118 @@ return MadeAnyChanges; } +bool IndVarSimplify::optimizeLoopExits(Loop *L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + BasicBlock * const Latch = L->getLoopLatch(); + + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getMaxBackedgeTakenInfo which isn't restricted to just constants. + // TODO: factor this out as a version of getMaxBackedgeTakenCount which + // isn't guaranteed to return a constant. + SmallVector ExitCounts; + const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L); + if (!isa(MaxConstEC)) + ExitCounts.push_back(MaxConstEC); + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + if (!isa(ExitCount)) { + assert(DT->dominates(ExitingBB, Latch) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + } + } + if (ExitCounts.empty()) + return false; + const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts); + + bool Changed = false; + for (BasicBlock *ExitingBB : ExitingBlocks) { + // If our exitting block exits multiple loops, we can only rewrite the + // innermost one. Otherwise, we're changing how many times the innermost + // loop runs before it exits. + if (LI->getLoopFor(ExitingBB) != L) + continue; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + continue; + + // If already constant, nothing to do. + if (isa((BI->getCondition()))) + continue; + + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + if (isa(ExitCount)) + continue; + + // If we know we'd exit on the first iteration, rewrite the exit to + // reflect this. This does not imply the loop must exit through this + // exit; there may be an earlier one taken on the first iteration. + // TODO: Given we know the backedge can't be taken, we should go ahead + // and break it. Or at least, kill all the header phis and simplify. + if (ExitCount->isZero()) { + auto *BI = cast(ExitingBB->getTerminator()); + bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); + auto *OldCond = BI->getCondition(); + auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : + ConstantInt::getFalse(OldCond->getType()); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.push_back(OldCond); + Changed = true; + continue; + } + + // If we end up with a pointer exit count, bail. + if (!ExitCount->getType()->isIntegerTy() || + !MaxExitCount->getType()->isIntegerTy()) + return false; + + Type *WiderType = + SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); + ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); + MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); + assert(MaxExitCount->getType() == ExitCount->getType()); + + // Can we prove that some other exit must be taken strictly before this + // one? TODO: handle cases where ule is known, and equality is covered + // by a dominating exit + if (SE->isKnownPredicate(CmpInst::ICMP_ULT, MaxExitCount, ExitCount) || + SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, + MaxExitCount, ExitCount) || + (SE->isKnownNonZero(ExitCount) && + SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, + MaxExitCount, ExitCount))) { + auto *BI = cast(ExitingBB->getTerminator()); + bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); + auto *OldCond = BI->getCondition(); + auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) : + ConstantInt::getTrue(OldCond->getType()); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.push_back(OldCond); + Changed = true; + continue; + } + + // TODO: If we can prove that the exiting iteration is equal to the exit + // count for this exit and that no previous exit oppurtunities exist within + // the loop, then we can discharge all other exits. (May fall out of + // previous TODO.) + + // TODO: If we can't prove any relation between our exit count and the + // loops exit count, but taking this exit doesn't require actually running + // the loop (i.e. no side effects, no computed values used in exit), then + // we can replace the exit test with a loop invariant test which exits on + // the first iteration. + } + return Changed; +} + //===----------------------------------------------------------------------===// // IndVarSimplify driver. Manage several subpasses of IV simplification. //===----------------------------------------------------------------------===// @@ -2701,6 +2814,8 @@ // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); + Changed |= optimizeLoopExits(L); + // If we have a trip count expression, rewrite the loop's exit condition // using it. if (!DisableLFTR) { @@ -2724,23 +2839,7 @@ if (isa(ExitCount)) continue; - // If we know we'd exit on the first iteration, rewrite the exit to - // reflect this. This does not imply the loop must exit through this - // exit; there may be an earlier one taken on the first iteration. - // TODO: Given we know the backedge can't be taken, we should go ahead - // and break it. Or at least, kill all the header phis and simplify. - if (ExitCount->isZero()) { - auto *BI = cast(ExitingBB->getTerminator()); - bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); - auto *OldCond = BI->getCondition(); - auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : - ConstantInt::getFalse(OldCond->getType()); - BI->setCondition(NewCond); - if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); - Changed = true; - continue; - } + assert(!ExitCount->isZero() && "Should have been folded above"); PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); if (!IndVar) Index: test/Transforms/IndVarSimplify/eliminate-exit.ll =================================================================== --- test/Transforms/IndVarSimplify/eliminate-exit.ll +++ test/Transforms/IndVarSimplify/eliminate-exit.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S < %s | FileCheck %s + +define void @ult(i64 %n, i64 %m) { +; CHECK-LABEL: @ult( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp ult i64 [[N:%.*]], [[M:%.*]] +; CHECK-NEXT: br i1 [[CMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 true, label [[LOOP]], label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp0 = icmp ult i64 %n, %m + br i1 %cmp0, label %loop, label %exit +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %iv.next = add i64 %iv, 1 + %cmp1 = icmp ult i64 %iv, %n + br i1 %cmp1, label %latch, label %exit +latch: + call void @side_effect() + %cmp2 = icmp ult i64 %iv, %m + br i1 %cmp2, label %loop, label %exit +exit: + ret void +} + +define void @ugt(i64 %n, i64 %m) { +; CHECK-LABEL: @ugt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp ugt i64 [[N:%.*]], [[M:%.*]] +; CHECK-NEXT: br i1 [[CMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: br i1 true, label [[LATCH]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IV]], [[M]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp0 = icmp ugt i64 %n, %m + br i1 %cmp0, label %loop, label %exit +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %iv.next = add i64 %iv, 1 + %cmp1 = icmp ult i64 %iv, %n + br i1 %cmp1, label %latch, label %exit +latch: + call void @side_effect() + %cmp2 = icmp ult i64 %iv, %m + br i1 %cmp2, label %loop, label %exit +exit: + ret void +} + +define void @ule(i64 %n, i64 %m) { +; CHECK-LABEL: @ule( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp ule i64 [[N:%.*]], [[M:%.*]] +; CHECK-NEXT: br i1 [[CMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IV]], [[M]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp0 = icmp ule i64 %n, %m + br i1 %cmp0, label %loop, label %exit +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %iv.next = add i64 %iv, 1 + %cmp1 = icmp ult i64 %iv, %n + br i1 %cmp1, label %latch, label %exit +latch: + call void @side_effect() + %cmp2 = icmp ult i64 %iv, %m + br i1 %cmp2, label %loop, label %exit +exit: + ret void +} + +define void @uge(i64 %n, i64 %m) { +; CHECK-LABEL: @uge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp uge i64 [[N:%.*]], [[M:%.*]] +; CHECK-NEXT: br i1 [[CMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IV]], [[M]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp0 = icmp uge i64 %n, %m + br i1 %cmp0, label %loop, label %exit +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %iv.next = add i64 %iv, 1 + %cmp1 = icmp ult i64 %iv, %n + br i1 %cmp1, label %latch, label %exit +latch: + call void @side_effect() + %cmp2 = icmp ult i64 %iv, %m + br i1 %cmp2, label %loop, label %exit +exit: + ret void +} + + +define void @ult_const_max(i64 %n) { +; CHECK-LABEL: @ult_const_max( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp ult i64 [[N:%.*]], 20 +; CHECK-NEXT: br i1 [[CMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: br i1 true, label [[LATCH]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp0 = icmp ult i64 %n, 20 + br i1 %cmp0, label %loop, label %exit +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %iv.next = add i64 %iv, 1 + %udiv = udiv i64 %iv, 10 + %cmp1 = icmp ult i64 %udiv, 2 + br i1 %cmp1, label %latch, label %exit +latch: + call void @side_effect() + %cmp2 = icmp ult i64 %iv, %n + br i1 %cmp2, label %loop, label %exit +exit: + ret void +} + + +declare void @side_effect()