Index: lib/Transforms/Scalar/LoopDeletion.cpp =================================================================== --- lib/Transforms/Scalar/LoopDeletion.cpp +++ lib/Transforms/Scalar/LoopDeletion.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -29,6 +30,22 @@ STATISTIC(NumDeleted, "Number of loops deleted"); +/// This function deletes dead loops. The caller of this function needs to +/// guarantee that the loop is infact dead. Here we handle two kinds of dead +/// loop. The first kind is where no backedges exist, and the loop variant +/// statements are made invariant by moving to the preheader. The second kind +/// is where the loop is no longer a loop in LLVM notion, since the loop is +/// semantically unreachable (see `IsLoopNeverExecuted`). We can be more +/// aggressive in removing the semantically unreachable loops since they will +/// not cause any difference to program behaviour (observable or unobservable). +/// +/// This also updates the relevant analysis information in \p DT, \p SE, and \p +/// LI. It also updates the loop PM if an updater struct is provided. +// TODO: This function will be used by loop-simplifyCFG as well. So, move this +// to LoopUtils.cpp +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater = nullptr); /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -84,12 +101,46 @@ return true; } +/// In LLVM, a loop can never be unreachable i.e. it's header would always have a +/// path from the entry block. However, in cases where the loop header would +/// never be executed at runtime, we consider such loops as semantically +/// unreachable. This function returns true if there is no viable path from the +/// entry block to the header of \p L. Right now, it only does a local search to +/// save compile time. +static bool isLoopNeverExecuted(Loop *L) { + using namespace PatternMatch; + + auto *Preheader = L->getLoopPreheader(); + // TODO: We can relax this constraint, since we just need a loop + // predecessor. + assert(Preheader && "Needs preheader!"); + + if (Preheader == &Preheader->getParent()->getEntryBlock()) + return false; + // All predecessors of the preheader should have a constant conditional + // branch, with the loop's preheader as not-taken. + for (auto *Pred: predecessors(Preheader)) { + BasicBlock *Taken, *NotTaken; + ConstantInt *Cond; + if (!match(Pred->getTerminator(), + m_Br(m_ConstantInt(Cond), Taken, NotTaken))) + return false; + if (!Cond->getZExtValue()) + std::swap(Taken, NotTaken); + if (Taken == Preheader) + return false; + } + // Either the preheader has no predecessors or all the predecessors have the + // loop preheader as not-taken target. + return true; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of /// the program other than finite running time. This never removes a loop that -/// might be infinite, as doing so could change the halting/non-halting nature -/// of a program. +/// might be infinite (unless it is semantically unreachable), as doing so +/// could change the halting/non-halting nature of a program. /// /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in /// order to make various safety checks work. @@ -97,28 +148,35 @@ /// \returns true if any changes were made. This may mutate the loop even if it /// is unable to delete it due to hoisting trivially loop invariant /// instructions out of the loop. -/// -/// This also updates the relevant analysis information in \p DT, \p SE, and \p -/// LI. It also updates the loop PM if an updater struct is provided. static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - // We can only remove the loop if there is a preheader that we can - // branch from after removing it. - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) + + // We can't remove loops that contain subloops. If the subloops were dead, + // they would already have been removed in earlier executions of this pass. + if (L->begin() != L->end()) return false; // If LoopSimplify form is not available, stay out of trouble. if (!L->hasDedicatedExits()) return false; - // We can't remove loops that contain subloops. If the subloops were dead, - // they would already have been removed in earlier executions of this pass. - if (L->begin() != L->end()) + // We can only remove the loop if there is a preheader that we can + // branch from after removing it. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return false; + BasicBlock *ExitBlock = L->getUniqueExitBlock(); + if (ExitBlock && isLoopNeverExecuted(L)) { + deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + return true; + } + + // The remaining checks below are for a loop being dead because all statements + // in the loop are invariant. SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -126,7 +184,6 @@ // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (!ExitBlock) return false; @@ -141,6 +198,19 @@ if (isa(S)) return Changed; + deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + + return true; +} + +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert(Preheader && "Preheader should exist!"); + // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. // @@ -156,17 +226,29 @@ // to determine what it needs to clean up. SE.forgetLoop(L); + auto *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Should have a unique exit block!"); + // Connect the preheader directly to the exit block. - TerminatorInst *TI = Preheader->getTerminator(); - TI->replaceUsesOfWith(L->getHeader(), ExitBlock); + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock); - // Rewrite phis in the exit block to get their inputs from - // the preheader instead of the exiting block. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. BasicBlock *ExitingBlock = ExitingBlocks[0]; BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast(BI)) { int j = P->getBasicBlockIndex(ExitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); + if (LoopIsNeverExecuted) + P->setIncomingValue(j, UndefValue::get(P->getType())); P->setIncomingBlock(j, Preheader); for (unsigned i = 1; i < ExitingBlocks.size(); ++i) P->removeIncomingValue(ExitingBlocks[i]); @@ -211,9 +293,6 @@ // The last step is to update LoopInfo now that we've eliminated this loop. LI.markAsRemoved(L); - ++NumDeleted; - - return true; } PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, @@ -254,7 +333,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) { if (skipLoop(L)) return false; - +errs() << "Loop Deletion for function: " << *L->getHeader()->getParent() << "\n"; DominatorTree &DT = getAnalysis().getDomTree(); ScalarEvolution &SE = getAnalysis().getSE(); LoopInfo &LI = getAnalysis().getLoopInfo(); Index: test/Transforms/LoopDeletion/unreachable-loops.ll =================================================================== --- /dev/null +++ test/Transforms/LoopDeletion/unreachable-loops.ll @@ -0,0 +1,336 @@ +; RUN: opt < %s -loop-deletion -S | FileCheck %s +; RUN: opt < %s -loop-deletion -verify-dom-info -S + +; Checking that we can delete loops that are never executed. +; We do not change the constant conditional branch statement (where the not-taken target +; is the loop) to an unconditional one. + +; delete the infinite loop because it is never executed. +define void @test1(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test1 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-NOT: bb: +entry: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; FIXME: We can delete this infinite loop. Currently we do not, +; because the infinite loop has no exit block. +define void @test2(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test2 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-LABEL: bb: +; CHECK: br label %bb +entry: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br label %bb + +return: + ret void +} + +; There are multiple exiting blocks and a single exit block. +; Since it is a never executed loop, we do not care about the values +; from different exiting paths and we can +; delete the loop. +define i64 @test3(i64 %n, i64 %m, i64 %maybe_zero) nounwind { + +; CHECK-NOT: bb: +; CHECK-NOT: bb2: +; CHECK-NOT: bb3: +; CHECK-LABEL: return.loopexit: +; CHECK-NEXT: %x.lcssa.ph = phi i64 [ undef, %bb.preheader ] +; CHECK-NEXT: br label %return +; CHECK-LABEL: return: +; CHECK-NEXT: %x.lcssa = phi i64 [ 20, %entry ], [ %x.lcssa.ph, %return.loopexit ] +; CHECK-NEXT: ret i64 %x.lcssa +entry: + br i1 false, label %bb, label %return + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb3 ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + br i1 %t1, label %bb2, label %return + +bb2: + %t2 = icmp slt i64 %x.0, %m + %unused1 = udiv i64 42, %maybe_zero + br i1 %t2, label %bb3, label %return + +bb3: + %t3 = icmp slt i64 %x.0, %m + %unused2 = sdiv i64 42, %maybe_zero + br i1 %t3, label %bb, label %return + +return: +; the only valid value fo x.lcssa is 20. + %x.lcssa = phi i64 [ 12, %bb ], [ 14, %bb2 ], [ 16, %bb3 ], [20, %entry ] + ret i64 %x.lcssa +} + +; Cannot delete the loop, since it may be executed at runtime. +define void @test4(i64 %n, i64 %m, i1 %cond) { +; CHECK-LABEL: test4 +; CHECK-LABEL: bb: +entry: + br i1 %cond, label %looppred1, label %looppred2 + +looppred1: + br i1 true, label %return, label %bb + +looppred2: + br i1 false, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; multiple constant conditional branches with loop not-taken in all cases. +define void @test5(i64 %n, i64 %m, i1 %cond) nounwind { +; CHECK-LABEL: test5 +; CHECK-LABEL: looppred1: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-LABEL: looppred2: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-NOT: bb: +entry: + br i1 %cond, label %looppred1, label %looppred2 + +looppred1: + br i1 true, label %return, label %bb + +looppred2: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; Don't delete this infinite loop because the loop +; is executable at runtime. +define void @test6(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test6 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %bb.preheader, label %bb.preheader +; CHECK: bb: +entry: + br i1 true, label %bb, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +declare i64 @foo(i64) +; The loop L2 is never executed and is a subloop, with an +; exit block that branches back to parent loop. +; Here we can delete loop L2, while L1 still exists. +define i64 @test7(i64 %n) { +; CHECK-LABEL: test7 +; CHECK-LABEL: L1: +; CHECK: br i1 true, label %L1Latch, label %L2.preheader +; CHECK-LABEL: L2.preheader: +; CHECK-NEXT: br label %L1Latch.loopexit +; CHECK-LABEL: L1Latch.loopexit: +; CHECK: br label %L1Latch +; CHECK-LABEL: L1Latch: +; CHECK-NEXT: %y = phi i64 [ %y.next, %L1 ], [ %y.L2.lcssa, %L1Latch.loopexit ] +; CHECK: br i1 %cond2, label %exit, label %L1 +entry: + br label %L1 + +L1: + %y.next = phi i64 [ 0, %entry ], [ %y.add, %L1Latch ] + br i1 true, label %L1Latch, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L1Latch + +L1Latch: + %y = phi i64 [ %y.next, %L1 ], [ %y.L2, %L2 ] + %y.add = add i64 %y, %n + %cond2 = icmp eq i64 %y.add, 42 + br i1 %cond2, label %exit, label %L1 + +exit: + ret i64 %y.add +} + + +; Show recursive deletion of loops. Since we start with subloops and progress outward +; to parent loop, we first delete the loop L2. Now loop L1 becomes a non-loop since it's backedge +; from L2's preheader to L1's exit block is never taken. So, L1 gets deleted as well. +define void @test8(i64 %n) { +; CHECK-LABEL: test8 +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: ret void +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L1 + +exit: + ret void +} + + +; Delete a loop (L2) which has subloop (L3). +; Here we delete loop L2, but leave L3 as is. +; FIXME: Can delete L3 as well, by iteratively going backward through the single +; predecessor of L3 until we reach L1's block that makes L3 unreachable. +define void @test9(i64 %n) { +; CHECK-LABEL: test9 +; CHECK-LABEL: L2.preheader: +; CHECK-NEXT: br label %L3.preheader +; CHECK-NOT: L2: +; CHECK-LABEL: L3.preheader: +; CHECK-NEXT: %y.L2.lcssa = phi i64 [ undef, %L2.preheader ] +; CHECK-NEXT: br label %L3 +; CHECK-LABEL: L3: +; CHECK: br i1 %cond2, label %L3, label %L1.loopexit +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L3 + +L3: + %cond2 = icmp slt i64 %y.L2, %n + br i1 %cond2, label %L3, label %L1 + +exit: + ret void +} + +; We cannot delete L3 because of call within it. +; Since L3 is not deleted, and entirely contained within L2, L2 is also not +; deleted. +; FIXME: We can delete unexecutable loops having +; subloops contained entirely within them. +define void @test10(i64 %n) { +; CHECK-LABEL: test10 +; CHECK: L2: +; CHECK: L3: +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L1, label %L3 + +L3: + %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ] + %y.L3.next = add i64 %y.L3, 1 + %dummy = call i64 @foo(i64 %y.L3.next) + %cond2 = icmp slt i64 %y.L3, %n + br i1 %cond2, label %L3, label %L2 + +exit: + ret void +} + +; same as test10, but L3 does not contain call. +; So, in the first iteration, all statements of L3 are made invariant, and L3 is +; deleted. +; In the next iteration, since L2 is unreachable and has no subloops, we delete +; L2 as well. Finally, the outermost loop L1 is deleted. +define void @test11(i64 %n) { +; CHECK-LABEL: test11 +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: ret void +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L1, label %L3 + +L3: + %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ] + %y.L3.next = add i64 %y.L3, 1 + %cond2 = icmp slt i64 %y.L3, %n + br i1 %cond2, label %L3, label %L2 + +exit: + ret void +} +