Index: lib/Transforms/Scalar/LoopSimplifyCFG.cpp =================================================================== --- lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -30,38 +30,134 @@ #include "llvm/IR/Dominators.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; #define DEBUG_TYPE "loop-simplifycfg" +static void UpdateDomTreeInfo(BasicBlock *BB, DominatorTree &DT) { + // When BB is about to be removed, move all of the BB's children to be children of + // its IDom. This allows us to remove the domtree entry for BB. + auto *BBDom = DT.getNode(BB); + if (!BBDom) + return; + SmallVector ChildNodes; + ChildNodes.insert(ChildNodes.begin(), BBDom->begin(), BBDom->end()); + for (auto &CN : ChildNodes) + DT.changeImmediateDominator(CN, BBDom->getIDom()); + DT.eraseNode(BB); +} + +// Fold a constant conditional branch into an unconditional +// branch. Return true if we succeed in folding the branch. +static bool TryToFoldConstConditionalBranch(Loop &L, BasicBlock *BB, + LoopInfo &LI, DominatorTree &DT) { + TerminatorInst *T = BB->getTerminator(); + IRBuilder<> Builder(T); + auto *BI = dyn_cast(T); + if (!BI || BI->isUnconditional()) + return false; + ConstantInt *Cond = dyn_cast(BI->getCondition()); + if (!Cond) + return false; + BasicBlock *Dest1 = BI->getSuccessor(0); + BasicBlock *Dest2 = BI->getSuccessor(1); + + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + // Only fold conditions where taken successor is within the loop. This is to + // avoid the possibility of converting a loop to a non-loop, where the only + // exiting block always jumps to the exit block out of the loop. + if (LI.getLoopFor(Destination) != &L) + return false; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust its PHI nodes. + OldDest->removePredecessor(BB); + + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Destination); + BI->eraseFromParent(); + + // Consider the case: + // block: + // br true, label Dest, label OldDest + // OldDest: ; predecessors: block + // When the branch is made an unconditional one: + // block: + // br label Dest + // OldDest: ; no predecessors! + // NB: OldDest predecessor list is not updated through `removePredecessor` + // above! It still has the `block` as it's predecessor. However, changing the + // branch to an unconditional one, makes this OldDest (and its successors) + // unreachable in LoopInfo, where unreachable is defined as having no predecessors. + // So, we need to update the relevant data structures. + // NB: This will never delete any loops completely. It will only delete + // blocks within loops. + if (pred_begin(OldDest) == pred_end(OldDest)) { + SmallPtrSet WorkList; + WorkList.insert(OldDest); + // Delete OldDest and dead successor blocks. + while (!WorkList.empty()) { + auto *BB = *WorkList.begin(); + WorkList.erase(BB); + SmallVector Successors(succ_begin(BB), succ_end(BB)); + UpdateDomTreeInfo(BB, DT); + // Update LoopInfo to remove the block from all loops where it is + // present. + LI.removeBlock(BB); + // Delete the block as well, so that these unreachable blocks do not + // affect the loop's LCSSA form. + DeleteDeadBlock(BB); + for (auto &SuccBB : Successors) + if (pred_begin(SuccBB) == pred_end(SuccBB)) { + WorkList.insert(SuccBB); + } + } + } + return true; +} + static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) { - bool Changed = false; + bool Changed = false, ChangedConstantFolding = false;; // Copy blocks into a temporary array to avoid iterator invalidation issues // as we remove them. SmallVector Blocks(L.blocks()); for (auto &Block : Blocks) { - // Attempt to merge blocks in the trivial case. Don't modify blocks which - // belong to other loops. - BasicBlock *Succ = cast_or_null(Block); - if (!Succ) + BasicBlock *BB = cast_or_null(Block); + if (!BB) continue; - BasicBlock *Pred = Succ->getSinglePredecessor(); - if (!Pred || !Pred->getSingleSuccessor() || LI.getLoopFor(Pred) != &L) - continue; + if (TryToFoldConstConditionalBranch(L, BB, LI, DT)) + ChangedConstantFolding = true; + auto *Succ = BB->getSingleSuccessor(); + if (!Succ || !Succ->getSinglePredecessor() || LI.getLoopFor(Succ) != &L) + continue; // Pred is going to disappear, so we need to update the loop info. - if (L.getHeader() == Pred) + if (L.getHeader() == BB) L.moveToHeader(Succ); - LI.removeBlock(Pred); - MergeBasicBlockIntoOnlyPred(Succ, &DT); + LI.removeBlock(BB); + // BB's dom node may already be erased if we passed through the constant + // folding path. If not, update it here. + UpdateDomTreeInfo(BB, DT); + MergeBasicBlockIntoOnlyPred(Succ); Changed = true; } - return Changed; + auto *F = L.getHeader()->getParent(); + // When we change constant branches to unconditional, we effectively make some + // blocks dead (i.e. physically remove their predecessors). When the DT is + // recomputed for verification, the dom nodes are not created for these dead + // blocks. However, for the in-place DT fix in UpdateDomTreeInfo, the dead + // blocks are marked as dominated by the entry block. TODO: Fix the dominator + // tree in place. + if (ChangedConstantFolding) + DT.recalculate(*F); + return Changed|ChangedConstantFolding; } PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, Index: test/Transforms/LoopSimplifyCFG/merge-constant-conditional-branch.ll =================================================================== --- /dev/null +++ test/Transforms/LoopSimplifyCFG/merge-constant-conditional-branch.ll @@ -0,0 +1,232 @@ +; RUN: opt -S -loop-simplifycfg < %s | FileCheck %s +; RUN: opt -S -loop-simplifycfg -verify-dom-info < %s +; RUN: opt -S -loop-simplifycfg -licm < %s + + +; dont modify self loops, which can convert a loop to a non-loop. +define i32 @test1(i32 %arg) { +; CHECK-LABEL: test1 +; CHECK: br i1 false, label %loop, label %exit +entry: + br label %loop + +loop: + %x = phi i32 [ %arg, %entry ], [ %x.next, %loop ] + %x.next = add i32 %x, 1 + br i1 false, label %loop, label %exit + +exit: + ret i32 %x.next +} + + + +; fold constant conditional branch +; loop has more than one backedge +define i32 @test2(i32 %arg, i32 %n) { +; CHECK-LABEL: test2 +; CHECK-NOT: br i1 true +; CHECK-LABEL: latch: +; CHECK-NEXT: %x = phi i32 [ %arg, %entry ], [ %x.next, %header.backedge ] +; CHECK-NEXT: %x2.next = add i32 %x, 2 +; CHECK-NEXT: %cond = icmp eq i32 %x, %n +; CHECK-NEXT: %x.next = add i32 %x, 1 +; CHECK-NEXT: br i1 %cond, label %header.backedge, label %exit +entry: + br label %header + +header: + %x = phi i32 [ %arg, %entry ], [ %x.next, %latch ], [%x2.next, %header] + %x2.next = add i32 %x, 2 + br i1 true, label %latch, label %header + +latch: + %cond = icmp eq i32 %x, %n + %x.next = add i32 %x, 1 + br i1 %cond, label %header, label %exit + +exit: + ret i32 %x +} + +; fold both conditional branches +define i32 @test3(i32 %x, i32 %y, i32 %n) { +; CHECK-LABEL: @test3 +; CHECK-NOT: br i1 false +; CHECK-NOT: br i1 true +; CHECK-LABEL: done: +; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ] +; CHECK: %cond = icmp slt i32 %i.next, %n +; CHECK-NEXT: br i1 %cond, label %done, label %exit +entry: + br label %header + +header: + %i = phi i32 [ 0, %entry ], [ %i.next, %done ] + %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ] + %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ] + br i1 false, label %div-by-zero, label %non-zero + +non-zero: + br i1 true, label %done, label %special_case + +special_case: + %div = sdiv i32 %x.next, %y.next + br label %done + +done: + %result = phi i32 [ %div, %special_case ], [ 1234, %non-zero ] + %i.next = add i32 %i, %result + %x.i = add i32 %x.next, %i + %y.i = add i32 %y.next, %i + %cond = icmp slt i32 %i.next, %n + br i1 %cond, label %header, label %exit + +exit: + ret i32 %x.i + +div-by-zero: + ret i32 1234 +} + +; We fold both conditions, and delete the div-by-zero block. +; second_loop block is still present in IR, will be deleted with a +; function/module pass. We do not delete/remove loops during loop-simplifycfg +define i32 @test4(i32 %x, i32 %y, i32 %n) { +; CHECK-LABEL: test4 +; CHECK-LABEL: done: +; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ] +; CHECK-NEXT: %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ] +; CHECK: %cond = icmp slt i32 %i.next, %n +; CHECK-NEXT: br i1 %cond, label %done, label %exit.loopexit1 +; CHECK-NOT: non-zero +; CHECK-NOT: special_case +; CHECK-NOT: div-by-zero +; CHECK-LABEL: second_loop: +entry: + br label %header + +header: + %i = phi i32 [ 0, %entry ], [ %i.next, %done ] + %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ] + %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ] + br i1 false, label %div-by-zero, label %non-zero + +non-zero: + br i1 true, label %done, label %special_case + +special_case: + %div = sdiv i32 %x.next, %y.next + br label %done + +done: + %result = phi i32 [ %div, %special_case ], [ 1234, %non-zero ] + %i.next = add i32 %i, %result + %x.i = add i32 %x.next, %i + %y.i = add i32 %y.next, %i + %cond = icmp slt i32 %i.next, %n + br i1 %cond, label %header, label %exit + +exit: + ret i32 %x.next + +div-by-zero: + %nsquare = mul i32 %n, %n + br label %second_loop + +second_loop: + %x2 = phi i32 [ %x.next, %div-by-zero], [%x2.next, %second_loop] + %x2.next = add i32 %x2, 1 + %cond2 = icmp slt i32 %x2.next, %nsquare + br i1 %cond2, label %second_loop, label %exit +} + +; when the not-taken target contains more than one predecessor +define i32 @test5(i32 %x, i32 %y, i32 %n) { +; CHECK-LABEL: test5 +; CHECK-NOT: br i1 false +; CHECK-NOT: br i1 true +; CHECK-NOT: special_case +; CHECK-LABEL: done: +; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ] +; CHECK: %cond = icmp slt i32 %i.next, %n +; CHECK: br i1 %cond, label %done, label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: %x.next.lcssa = phi i32 [ %x.next, %done ] +entry: + br label %header + +header: + %i = phi i32 [ 0, %entry ], [ %i.next, %done ] + %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ] + %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ] + br i1 false, label %special_case, label %non-zero + +non-zero: + br i1 true, label %done, label %special_case + +special_case: + %div = sdiv i32 %x.next, %y.next + br label %done + +done: + %result = phi i32 [ %div, %special_case ], [ 1234, %non-zero ] + %i.next = add i32 %i, %result + %x.i = add i32 %x.next, %i + %y.i = add i32 %y.next, %i + %cond = icmp slt i32 %i.next, %n + br i1 %cond, label %header, label %exit + +exit: + ret i32 %x.next +} + +; delete multiple successor blocks because they became dead. +define i32 @test6(i32 %x, i32 %y, i32 %n) { +; CHECK-LABEL: test6 +; CHECK-NOT: br i1 false +; CHECK-NOT: br i1 true +; CHECK-NOT: special_case +; CHECK-NOT: interm: +; CHECK-LABEL: interm2: +; CHECK-NEXT: br label %interm2 +; CHECK-LABEL: done: +; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ] +; CHECK: %cond = icmp slt i32 %i.next, %n +; CHECK: br i1 %cond, label %done, label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: %x.next.lcssa2 = phi i32 [ %x.next, %done ] +entry: + br label %header + +header: + %i = phi i32 [ 0, %entry ], [ %i.next, %done ] + %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ] + %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ] + br i1 false, label %special_case, label %non-zero + +non-zero: + br i1 true, label %done, label %special_case + +special_case: + %div = sdiv i32 %x.next, %y.next + br label %interm1 + +interm1: + br label %interm2 + +; dont delete loop interm2, even though it's unreachable. +interm2: + br label %interm2 + +done: + %result = phi i32 [ 1234, %non-zero ] + %i.next = add i32 %i, %result + %x.i = add i32 %x.next, %i + %y.i = add i32 %y.next, %i + %cond = icmp slt i32 %i.next, %n + br i1 %cond, label %header, label %exit + +exit: + ret i32 %x.next +}