Index: lib/Transforms/Scalar/LoopDeletion.cpp =================================================================== --- lib/Transforms/Scalar/LoopDeletion.cpp +++ lib/Transforms/Scalar/LoopDeletion.cpp @@ -29,6 +29,23 @@ STATISTIC(NumDeleted, "Number of loops deleted"); +/// This function deletes dead loops. The caller of this function needs to +/// guarantee that the loop is infact dead. +/// Here we handle two kinds of dead loop. The first kind is where no backedges exist, and +/// the loop variant statements are made invariant by moving to the preheader. +/// The second kind is where the loop is no longer a loop in LLVM notion, since +/// the loop is semantically unreachable (see `IsLoopNeverExecuted`). +/// We can be more aggressive in removing the semantically unreachable loops +/// since they will not cause any difference to program behaviour (observable or +/// unobservable). +/// +/// This also updates the relevant analysis information in \p DT, \p SE, and \p +/// LI. It also updates the loop PM if an updater struct is provided. +// TODO: This function will be used by loop-simplifyCFG as well. So, move this +// to LoopUtils.cpp +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater = nullptr); /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -84,12 +101,55 @@ return true; } +/// In LLVM, a loop can never be unreachable i.e. it's header would +/// always have atleast one predecessor. However, in cases where the loop +/// header would never be executed at runtime, we consider such loops as +/// semantically unreachable. This function returns true when loop `L` is +/// semantically unreachable. For example, loop L2 is unreachable in +/// the following IR: +/// L1: ... +/// br i1 true, label L1, label L2 +/// L2: ... ; predecessors: L1, B3 +/// br label B3 +/// B3: ... ; predecessors: L2 +/// br label L2 +static bool isLoopNeverExecuted(Loop *L) { + + auto *Pred = L->getLoopPredecessor(); + auto *FirstLoopBlock = L->getHeader(); + + // Pred can be the preheader that branches unconditionally to the loop header. + // We want the block that has a constant conditional branch, with the loop's + // first block as not-taken. + if (Pred && Pred->getTerminator()->getNumSuccessors() == 1) { + FirstLoopBlock = Pred; + Pred = Pred->getSinglePredecessor(); + } + + if (!Pred) + return false; + // Note: We could keep iterating backwards over the singlePredecessors + // (straight line code) until we reach a block that satisfies the constraint + // of constant conditional branch. + auto *BI = dyn_cast(Pred->getTerminator()); + if (!BI || BI->isUnconditional()) + return false; + ConstantInt *Cond = dyn_cast(BI->getCondition()); + if (!Cond) + return false; + BasicBlock *NotTaken = + Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0); + BasicBlock *Taken = + Cond->getZExtValue() ? BI->getSuccessor(0) : BI->getSuccessor(1); + return NotTaken == FirstLoopBlock && Taken != FirstLoopBlock; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of /// the program other than finite running time. This never removes a loop that -/// might be infinite, as doing so could change the halting/non-halting nature -/// of a program. +/// might be infinite (unless it is semantically unreachable), as doing so +/// could change the halting/non-halting nature of a program. /// /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in /// order to make various safety checks work. @@ -97,28 +157,42 @@ /// \returns true if any changes were made. This may mutate the loop even if it /// is unable to delete it due to hoisting trivially loop invariant /// instructions out of the loop. -/// -/// This also updates the relevant analysis information in \p DT, \p SE, and \p -/// LI. It also updates the loop PM if an updater struct is provided. static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - // We can only remove the loop if there is a preheader that we can - // branch from after removing it. - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) + + // We can't remove loops that contain subloops. If the subloops were dead, + // they would already have been removed in earlier executions of this pass. + if (L->begin() != L->end()) return false; // If LoopSimplify form is not available, stay out of trouble. if (!L->hasDedicatedExits()) return false; - // We can't remove loops that contain subloops. If the subloops were dead, - // they would already have been removed in earlier executions of this pass. - if (L->begin() != L->end()) + BasicBlock *ExitBlock = L->getUniqueExitBlock(); + + // When the loop is never executed, we do not care if it is an + // infinite loop, has multiple exits, or subloops (entirely contained in this + // loop). For now, we handle possibly infinite loops and loops with single exit. + if (ExitBlock && isLoopNeverExecuted(L)) { + deleteDeadLoop(L, DT, SE, LI, + /*LoopIsNeverExecuted*/ true, Updater); + ++NumDeleted; + return true; + } + + // The remaining checks below are for a loop being dead because it has no + // backedge, and all statements in the loop are invariant. This is a non-loop + // if we can move all statements within the loop to the preheader. + // We can only remove the loop if there is a preheader that we can + // branch from after removing it. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return false; + SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -126,7 +200,6 @@ // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (!ExitBlock) return false; @@ -141,6 +214,28 @@ if (isa(S)) return Changed; + deleteDeadLoop(L, DT, SE, LI, false /*LoopIsNeverExecuted*/, Updater); + ++NumDeleted; + + return true; +} + +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert((Preheader || LoopIsNeverExecuted) && + "Preheader should exist if loop is reachable!"); + BasicBlock *BlockCausingLoopUnreachable = nullptr; + // We already proved that the loop is never executed. Get the block that makes + // the loop unreachable. If the preheader does not exist, this will be the + // source block which will be directly connected to the exit block of the + // loop. + if (LoopIsNeverExecuted) + BlockCausingLoopUnreachable = + Preheader ? Preheader->getSinglePredecessor() : L->getLoopPredecessor(); + // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. // @@ -156,18 +251,35 @@ // to determine what it needs to clean up. SE.forgetLoop(L); - // Connect the preheader directly to the exit block. - TerminatorInst *TI = Preheader->getTerminator(); - TI->replaceUsesOfWith(L->getHeader(), ExitBlock); + auto *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Should have a unique exit block!"); + // All the branch changes, phi node updates at the exit block and dom tree + // updates should be done with this node as the source. + auto *SourceBB = Preheader ? Preheader : BlockCausingLoopUnreachable; + assert(SourceBB && "the source basic block into the loop should exist!"); + + // Connect the source block that exists outside the loop, directly to + // the exit block. + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + SourceBB->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock); - // Rewrite phis in the exit block to get their inputs from - // the preheader instead of the exiting block. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + // Rewrite phis in the exit block to get their inputs from the SourceBB + // instead of the exiting block. BasicBlock *ExitingBlock = ExitingBlocks[0]; BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast(BI)) { int j = P->getBasicBlockIndex(ExitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); - P->setIncomingBlock(j, Preheader); + if (LoopIsNeverExecuted) + P->setIncomingValue(j, UndefValue::get(P->getType())); + P->setIncomingBlock(j, SourceBB); for (unsigned i = 1; i < ExitingBlocks.size(); ++i) P->removeIncomingValue(ExitingBlocks[i]); ++BI; @@ -178,11 +290,11 @@ SmallVector ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { - // Move all of the block's children to be children of the Preheader, which + // Move all of the block's children to be children of the SourceBB, which // allows us to remove the domtree entry for the block. ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end()); for (DomTreeNode *ChildNode : ChildNodes) { - DT.changeImmediateDominator(ChildNode, DT[Preheader]); + DT.changeImmediateDominator(ChildNode, DT[SourceBB]); } ChildNodes.clear(); @@ -211,9 +323,6 @@ // The last step is to update LoopInfo now that we've eliminated this loop. LI.markAsRemoved(L); - ++NumDeleted; - - return true; } PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, Index: test/Transforms/LoopDeletion/unreachable-loops.ll =================================================================== --- /dev/null +++ test/Transforms/LoopDeletion/unreachable-loops.ll @@ -0,0 +1,333 @@ +; RUN: opt < %s -loop-deletion -S | FileCheck %s +; RUN: opt < %s -loop-deletion -verify-dom-info -S + +; Checking that we can delete loops that are never executed. +; We do not change the constant conditional branch statement (where the not-taken target +; is the loop) to an unconditional one. + +; delete the infinite loop because it is never executed. +define void @test1(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test1 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-NOT: bb: +entry: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; FIXME: We can delete this infinite loop. Currently we do not, +; because the infinite loop has no exit block. +define void @test2(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test2 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-LABEL: bb: +; CHECK: br label %bb +entry: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br label %bb + +return: + ret void +} + +; There are multiple exiting blocks and a single exit block. +; Since it is a never executed loop, we do not care about the values +; from different exiting paths and we can +; delete the loop. +define i64 @test3(i64 %n, i64 %m, i64 %maybe_zero) nounwind { + +; CHECK-NOT: bb: +; CHECK-NOT: bb2: +; CHECK-NOT: bb3: +; CHECK-LABEL: return.loopexit: +; CHECK-NEXT: %x.lcssa.ph = phi i64 [ undef, %bb.preheader ] +; CHECK-NEXT: br label %return +; CHECK-LABEL: return: +; CHECK-NEXT: %x.lcssa = phi i64 [ 20, %entry ], [ %x.lcssa.ph, %return.loopexit ] +; CHECK-NEXT: ret i64 %x.lcssa +entry: + br i1 false, label %bb, label %return + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb3 ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + br i1 %t1, label %bb2, label %return + +bb2: + %t2 = icmp slt i64 %x.0, %m + %unused1 = udiv i64 42, %maybe_zero + br i1 %t2, label %bb3, label %return + +bb3: + %t3 = icmp slt i64 %x.0, %m + %unused2 = sdiv i64 42, %maybe_zero + br i1 %t3, label %bb, label %return + +return: +; the only valid value fo x.lcssa is 20. + %x.lcssa = phi i64 [ 12, %bb ], [ 14, %bb2 ], [ 16, %bb3 ], [20, %entry ] + ret i64 %x.lcssa +} + +; Cannot delete the loop, since it may be executed at runtime. +define void @test4(i64 %n, i64 %m, i1 %cond) { +; CHECK-LABEL: test4 +; CHECK-LABEL: bb: +entry: + br i1 %cond, label %looppred1, label %looppred2 + +looppred1: + br i1 true, label %return, label %bb + +looppred2: + br i1 false, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; FIXME: multiple constant conditional branches with loop not-taken in test1. +; Currently, we do not delete the loop. +define void @test5(i64 %n, i64 %m, i1 %cond) nounwind { +; CHECK-LABEL: test5 +; CHECK-LABEL: bb: +entry: + br i1 %cond, label %looppred1, label %looppred2 + +looppred1: + br i1 true, label %return, label %bb + +looppred2: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; Don't delete this infinite loop because the loop +; is executable at runtime. +define void @test6(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test6 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %bb.preheader, label %bb.preheader +; CHECK: bb: +entry: + br i1 true, label %bb, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +declare i64 @foo(i64) +; The loop L2 is never executed and is a subloop, with an +; exit block that branches back to parent loop. +; Here we can delete loop L2, while L1 still exists. +define i64 @test7(i64 %n) { +; CHECK-LABEL: test7 +; CHECK-LABEL: L1: +; CHECK: br i1 true, label %L1Latch, label %L2.preheader +; CHECK-LABEL: L2.preheader: +; CHECK-NEXT: br label %L1Latch.loopexit +; CHECK-LABEL: L1Latch.loopexit: +; CHECK: br label %L1Latch +; CHECK-LABEL: L1Latch: +; CHECK-NEXT: %y = phi i64 [ %y.next, %L1 ], [ %y.L2.lcssa, %L1Latch.loopexit ] +; CHECK: br i1 %cond2, label %exit, label %L1 +entry: + br label %L1 + +L1: + %y.next = phi i64 [ 0, %entry ], [ %y.add, %L1Latch ] + br i1 true, label %L1Latch, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L1Latch + +L1Latch: + %y = phi i64 [ %y.next, %L1 ], [ %y.L2, %L2 ] + %y.add = add i64 %y, %n + %cond2 = icmp eq i64 %y.add, 42 + br i1 %cond2, label %exit, label %L1 + +exit: + ret i64 %y.add +} + + +; Show recursive deletion of loops. Since we start with subloops and progress outward +; to parent loop, we first delete the loop L2. Now loop L1 becomes a non-loop since it's backedge +; from L2's preheader to L1's exit block is never taken. So, L1 gets deleted as well. +define void @test8(i64 %n) { +; CHECK-LABEL: test8 +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: ret void +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L1 + +exit: + ret void +} + + +; Delete a loop (L2) which has subloop (L3). +; Here we delete loop L2, but leave L3 as is. +; FIXME: Can delete L3 as well, by iteratively going backward through the single +; predecessor of L3 until we reach L1's block that makes L3 unreachable. +define void @test9(i64 %n) { +; CHECK-LABEL: test9 +; CHECK-LABEL: L2.preheader: +; CHECK-NEXT: br label %L3.preheader +; CHECK-NOT: L2: +; CHECK-LABEL: L3.preheader: +; CHECK-NEXT: %y.L2.lcssa = phi i64 [ undef, %L2.preheader ] +; CHECK-NEXT: br label %L3 +; CHECK-LABEL: L3: +; CHECK: br i1 %cond2, label %L3, label %L1.loopexit +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L3 + +L3: + %cond2 = icmp slt i64 %y.L2, %n + br i1 %cond2, label %L3, label %L1 + +exit: + ret void +} + +; We cannot delete L3 because of call within it. +; Since L3 is not deleted, and entirely contained within L2, L2 is also not +; deleted. +; FIXME: We can delete unexecutable loops having +; subloops contained entirely within them. +define void @test10(i64 %n) { +; CHECK-LABEL: test10 +; CHECK: L2: +; CHECK: L3: +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L1, label %L3 + +L3: + %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ] + %y.L3.next = add i64 %y.L3, 1 + %dummy = call i64 @foo(i64 %y.L3.next) + %cond2 = icmp slt i64 %y.L3, %n + br i1 %cond2, label %L3, label %L2 + +exit: + ret void +} + +; same as test10, but L3 does not contain call. +; So, in the first iteration, all statements of L3 are made invariant, and L3 is +; deleted. +; In the next iteration, since L2 is unreachable and has no subloops, we delete +; L2 as well. Finally, the outermost loop L1 is deleted. +define void @test11(i64 %n) { +; CHECK-LABEL: test11 +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: ret void +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L1, label %L3 + +L3: + %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ] + %y.L3.next = add i64 %y.L3, 1 + %cond2 = icmp slt i64 %y.L3, %n + br i1 %cond2, label %L3, label %L2 + +exit: + ret void +} +