Index: llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -418,6 +418,92 @@ } } + /// Update loop info for blocks from the original loop that remain live but + /// are no longer a part of the current loop after the folding. + void updateLiveBlocksInfo() { + // If there is no blocks that are no longer in current loop, nothing to do. + if (BlocksInLoopAfterFolding.size() == LiveLoopBlocks.size()) + return; + + L.getHeader()->getParent()->dump(); + assert(DFS.isComplete() && "Must be!"); + + // Block needs update if will be live but will not be a part of the current + // loop after folding will be done. + auto NeedsUpdate = [&](BasicBlock *BB) { + return LiveLoopBlocks.count(BB) && !BlocksInLoopAfterFolding.count(BB); + }; + SmallPtrSet Visited; + // Traverse blocks in Postorder to preserve the following invariant: by the + // moment when we process a block, all its successors already have correct + // loops set. + for (auto I = DFS.beginPostorder(), E = DFS.endPostorder(); I != E; ++I) { + BasicBlock *BB = *I; + if (!NeedsUpdate(BB) || Visited.count(BB)) + continue; + + // The entire child subloop will be handled together with its header. + Loop *BBL = LI.getLoopFor(BB); + bool ProcessingChildLoop = (BBL != &L); + if (ProcessingChildLoop) + if (!LI.isLoopHeader(BB)) + continue; + + // Now we process BB. If BB is a loop header, we also process all blocks + // from this loop. To identify which loop contains BB now, look at its + // successors and find innermost loop among their containing loops. + SmallVector BlocksToProcess; + SmallPtrSet Successors; + if (ProcessingChildLoop) { + for (auto *B : BBL->blocks()) { + Visited.insert(B).second; + BlocksToProcess.push_back(B); + } + } else { + Visited.insert(BB); + BlocksToProcess.push_back(BB); + } + + for (BasicBlock *B : BlocksToProcess) + for (BasicBlock *Succ : successors(B)) + Successors.insert(Succ); + // Exclude successors for which we haven't figured out the right loop. + for (BasicBlock *B : BlocksToProcess) + Successors.erase(B); + +#ifndef NDEBUG + // Make sure that all in-loop successors of the block(s) we are handling + // now have already been processed by this moment. + for (BasicBlock *Succ : Successors) + if (NeedsUpdate(Succ)) + assert(Visited.count(Succ) && "Bad order!"); +#endif + // Find the most nested ancestor loop that is still BB's successor. BB + // also belongs to this loop. + Loop *ReachableOuterLoop = getInnermostLoopFor(Successors, L, LI); + for (BasicBlock *B : BlocksToProcess) { + // Remove blocks from loops that no longer contain it. + if (!ProcessingChildLoop) + LI.changeLoopFor(B, ReachableOuterLoop); + removeBlockFromLoops(B, &L, ReachableOuterLoop); + } + + // If we are processing a child loop, reattach it to new parent. + if (ProcessingChildLoop) { + assert(BBL->getParentLoop() == &L && "Should be!"); + L.removeChildLoop(BBL); + if (ReachableOuterLoop) + ReachableOuterLoop->addChildLoop(BBL); + else + LI.addTopLevelLoop(BBL); + } + } + + // We have new exit blocks that have not been exit blocks before. Build + // LCSSA form for them. + formLCSSA(L, DT, &LI, nullptr); + } + /// Delete loop blocks that have become unreachable after folding. Make all /// relevant updates to DT and LI. void deleteDeadLoopBlocks() { @@ -553,19 +639,6 @@ return false; } - // TODO: Support blocks that are not dead, but also not in loop after the - // folding. - if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() != - L.getNumBlocks()) { - LLVM_DEBUG( - dbgs() << "Give up constant terminator folding in loop " - << L.getHeader()->getName() - << ": we don't currently" - " support blocks that are not dead, but will stop " - "being a part of the loop after constant-folding.\n"); - return false; - } - SE.forgetTopmostLoop(&L); // Dump analysis results. LLVM_DEBUG(dump()); @@ -577,6 +650,7 @@ // Make the actual transforms. handleDeadExits(); foldTerminators(); + updateLiveBlocksInfo(); if (!DeadLoopBlocks.empty()) { LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size() Index: llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll =================================================================== --- llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll +++ llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll @@ -12,16 +12,16 @@ ; CHECK-NEXT: preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_1:%.*]], [[HEADER_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 ; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] ; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: br label [[HEADER]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: br i1 false, label [[HEADER_BACKEDGE]], label [[EXIT:%.*]] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2:%.*]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]] @@ -49,18 +49,16 @@ ; CHECK-NEXT: preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_1:%.*]], [[HEADER_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 ; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] ; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: br label [[HEADER]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: switch i32 1, label [[EXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2:%.*]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]] @@ -1583,7 +1581,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1591,8 +1589,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -1653,9 +1649,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: switch i32 1, label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1663,8 +1657,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -1725,7 +1717,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_2_BACKEDGE]] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1791,9 +1783,7 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: -; CHECK-NEXT: switch i32 1, label [[LOOP_2_BACKEDGE]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1863,7 +1853,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1871,8 +1861,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -1943,9 +1931,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: switch i32 1, label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -1953,8 +1939,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -2025,7 +2009,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_2_BACKEDGE]] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -2101,9 +2085,7 @@ ; CHECK: intermediate_loop: ; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: switch i32 1, label [[LOOP_2_BACKEDGE]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -2183,7 +2165,7 @@ ; CHECK: intermediate_block: ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_2_BACKEDGE]] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -2266,9 +2248,7 @@ ; CHECK: intermediate_block: ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: switch i32 1, label [[LOOP_2_BACKEDGE]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -2352,7 +2332,7 @@ ; CHECK: intermediate_block: ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -2360,8 +2340,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: @@ -2439,9 +2417,7 @@ ; CHECK: intermediate_block: ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: -; CHECK-NEXT: switch i32 1, label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_3_backedge: ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 ; CHECK-NEXT: br i1 [[COND2]], label [[LOOP_3]], label [[LOOP_2_BACKEDGE]] @@ -2449,8 +2425,6 @@ ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 [[J_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_2]], label [[LOOP_1_BACKEDGE_LOOPEXIT1:%.*]] -; CHECK: loop_1_backedge.loopexit: -; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge.loopexit1: ; CHECK-NEXT: br label [[LOOP_1_BACKEDGE]] ; CHECK: loop_1_backedge: