Index: lib/Transforms/Scalar/LoopSimplifyCFG.cpp =================================================================== --- lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -84,6 +84,7 @@ Loop &L; LoopInfo &LI; DominatorTree &DT; + LoopBlocksDFS DFS; // Whether or not the current loop will still exist after terminator constant // folding will be done. In theory, there are two ways how it can happen: @@ -141,7 +142,6 @@ /// Fill all information about status of blocks and exits of the current loop /// if constant folding of all branches will be done. void analyze() { - LoopBlocksDFS DFS(&L); DFS.perform(&LI); assert(DFS.isComplete() && "DFS is expected to be finished"); @@ -386,9 +386,66 @@ } } + bool fixupCurrentLoop() { + // If there is no blocks that are no longer in current loop, nothing to do. + if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() == + L.getNumBlocks()) + return false; + + // Traverse blocks in Postorder to preserve the following invariant: by the + // moment when we process a block, all its successors already have correct + // loops set. + for (auto I = DFS.beginPostorder(), E = DFS.endPostorder(); I != E; ++I) { + BasicBlock *BB = *I; + // If this block stayed in current loop, skip it. + if (BlocksInLoopAfterFolding.count(BB)) + continue; + + // Collect immediate subloops. + bool NeedSubloopFixup = false; + Loop *BBL = LI.getLoopFor(BB); + if (BBL->getParentLoop() == &L && BBL->getHeader() == BB) + NeedSubloopFixup = true; + + // Find the most nested ancestor loop that is still BB's successor. BB + // also belongs to this loop. + Loop *StillReachable = nullptr; + for (BasicBlock *Succ : successors(BB)) { + Loop *SuccL = LI.getLoopFor(Succ); + while (SuccL && !SuccL->contains(L.getHeader())) + SuccL = SuccL->getParentLoop(); + if (SuccL) + if (!StillReachable || StillReachable->getLoopDepth() > + SuccL->getLoopDepth()) + StillReachable = SuccL; + } + + assert(StillReachable != &L && "Current loop should not be reachable!"); + // Finally, set the correct loop for BB. + if (BBL == &L) + LI.changeLoopFor(BB, StillReachable); + for (Loop *Current = &L; Current != StillReachable; + Current = Current->getParentLoop()) + Current->removeBlockFromLoop(BB); + if (NeedSubloopFixup) { + L.removeChildLoop(BBL); + if (StillReachable) + StillReachable->addChildLoop(BBL); + else + LI.addTopLevelLoop(BBL); + } + } + + return true; + } + + void fixupLCSSA() { + formLCSSA(L, DT, &LI, nullptr); + } + public: ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT) - : L(L), LI(LI), DT(DT) {} + : L(L), LI(LI), DT(DT), DFS(&L) {} bool run() { assert(L.getLoopLatch() && "Should be single latch!"); @@ -417,19 +474,6 @@ return false; } - // TODO: Support blocks that are not dead, but also not in loop after the - // folding. - if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() != - L.getNumBlocks()) { - LLVM_DEBUG( - dbgs() << "Give up constant terminator folding in loop " - << L.getHeader()->getName() - << ": we don't currently" - " support blocks that are not dead, but will stop " - "being a part of the loop after constant-folding.\n"); - return false; - } - // Dump analysis results. LLVM_DEBUG(dump()); @@ -440,6 +484,7 @@ // Make the actual transforms. handleDeadExits(); foldTerminators(); + bool NewExitsAppeared = fixupCurrentLoop(); if (!DeadLoopBlocks.empty()) { LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size() @@ -448,11 +493,15 @@ deleteDeadLoopBlocks(); } + if (NewExitsAppeared) + fixupLCSSA(); + #ifndef NDEBUG // Make sure that we have preserved all data structures after the transform. DT.verify(); assert(DT.isReachableFromEntry(L.getHeader())); LI.verify(DT); + assert(L.isRecursivelyLCSSAForm(DT, LI)); #endif return true; Index: test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll =================================================================== --- test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll +++ test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll @@ -12,16 +12,16 @@ ; CHECK-NEXT: preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_1:%.*]], [[HEADER_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 ; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] ; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: br label [[HEADER]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: br i1 false, label [[HEADER_BACKEDGE]], label [[EXIT:%.*]] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2:%.*]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]] @@ -49,18 +49,16 @@ ; CHECK-NEXT: preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_1:%.*]], [[HEADER_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 ; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] ; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: br label [[HEADER]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: switch i32 1, label [[EXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2:%.*]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]] @@ -1583,7 +1581,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1591,8 +1589,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -1653,9 +1649,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: switch i32 1, label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1663,8 +1657,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -1725,7 +1717,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_2_BACKEDGE]] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1791,9 +1783,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: switch i32 1, label [[LOOP_2_BACKEDGE]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1863,7 +1853,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1871,8 +1861,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -1943,9 +1931,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: switch i32 1, label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1953,8 +1939,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -2025,7 +2009,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_2_BACKEDGE]] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -2101,9 +2085,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: switch i32 1, label [[LOOP_2_BACKEDGE]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]]