Index: lib/Transforms/Scalar/LoopSimplifyCFG.cpp =================================================================== --- lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -84,6 +84,7 @@ Loop &L; LoopInfo &LI; DominatorTree &DT; + LoopBlocksDFS DFS; // Whether or not the current loop will still exist after terminator constant // folding will be done. In theory, there are two ways how it can happen: @@ -141,7 +142,6 @@ /// Fill all information about status of blocks and exits of the current loop /// if constant folding of all branches will be done. void analyze() { - LoopBlocksDFS DFS(&L); DFS.perform(&LI); assert(DFS.isComplete() && "DFS is expected to be finished"); @@ -386,9 +386,67 @@ } } + bool fixupCurrentLoop() { + // If there is no blocks that are no longer in current loop, nothing to do. + if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() == + L.getNumBlocks()) + return false; + + // Traverse blocks in Postorder to preserve the following invariant: by the + // moment when we process a block, all its successors already have correct + // loops set. + for (auto I = DFS.beginPostorder(), E = DFS.endPostorder(); I != E; ++I) { + BasicBlock *BB = *I; + // If this block stayed in current loop, skip it. + if (BlocksInLoopAfterFolding.count(BB)) + continue; + + // If the block being processed is a header of a child loop, detatch it. + Loop *ChildLoop = nullptr; + if (LI.isLoopHeader(BB)) { + ChildLoop = LI.getLoopFor(BB); + if (ChildLoop->getParentLoop() != &L) + ChildLoop = nullptr; + else + L.removeChildLoop(ChildLoop); + } + + // Find the most nested ancestor loop that is still BB's successor. BB + // also belongs to this loop. + Loop *StillReachable = nullptr; + for (BasicBlock *Succ : successors(BB)) { + Loop *BBL = LI.getLoopFor(Succ); + if (BBL && BBL->contains(L.getHeader())) + if (!StillReachable || StillReachable->getLoopDepth() > + BBL->getLoopDepth()) + StillReachable = BBL; + } + + assert(StillReachable != &L && "Current loop should not be reachable!"); + // Reattach the child loop is needed. + if (ChildLoop) { + if (StillReachable) + StillReachable->addChildLoop(ChildLoop); + else + LI.addTopLevelLoop(ChildLoop); + } + // Finally, set the correct loop for BB. + LI.changeLoopFor(BB, StillReachable); + for (Loop *Current = &L; Current != StillReachable; + Current = Current->getParentLoop()) + Current->removeBlockFromLoop(BB); + } + + return true; + } + + void fixupLCSSA() { + formLCSSA(L, DT, &LI, nullptr); + } + public: ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT) - : L(L), LI(LI), DT(DT) {} + : L(L), LI(LI), DT(DT), DFS(&L) {} bool run() { assert(L.getLoopLatch() && "Should be single latch!"); @@ -417,19 +475,6 @@ return false; } - // TODO: Support blocks that are not dead, but also not in loop after the - // folding. - if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() != - L.getNumBlocks()) { - LLVM_DEBUG( - dbgs() << "Give up constant terminator folding in loop " - << L.getHeader()->getName() - << ": we don't currently" - " support blocks that are not dead, but will stop " - "being a part of the loop after constant-folding.\n"); - return false; - } - // Dump analysis results. LLVM_DEBUG(dump()); @@ -440,6 +485,7 @@ // Make the actual transforms. handleDeadExits(); foldTerminators(); + bool NewExitsAppeared = fixupCurrentLoop(); if (!DeadLoopBlocks.empty()) { LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size() @@ -448,11 +494,15 @@ deleteDeadLoopBlocks(); } + if (NewExitsAppeared) + fixupLCSSA(); + #ifndef NDEBUG // Make sure that we have preserved all data structures after the transform. DT.verify(); assert(DT.isReachableFromEntry(L.getHeader())); LI.verify(DT); + assert(L.isRecursivelyLCSSAForm(DT, LI)); #endif return true; Index: test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll =================================================================== --- test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll +++ test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll @@ -6,8 +6,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" -; CHECK-LABEL: In function dead_backedge_test_branch_loop: Give up constant terminator folding in loop header: we don't currently support blocks that are not dead, but will stop being a part of the loop after constant-folding. -; CHECK-LABEL: In function dead_backedge_test_switch_loop: Give up constant terminator folding in loop header: we don't currently support blocks that are not dead, but will stop being a part of the loop after constant-folding. +; CHECK-LABEL: In function dead_backedge_test_branch_loop: Constant terminator folding for loop Loop at depth 1 containing: %header
,%dead_backedge,%header.backedge +; CHECK: Constant-folding 1 terminators in loop header +; CHECK-LABEL: In function dead_backedge_test_switch_loop: Constant terminator folding for loop Loop at depth 1 containing: %header
,%dead_backedge,%header.backedge +; CHECK: Constant-folding 1 terminators in loop header ; CHECK-LABEL: In function dead_block_test_branch_loop: Constant terminator folding for loop Loop at depth 1 containing: %header
,%dead,%backedge ; CHECK: Deleting 1 dead blocks in loop header ; CHECK-LABEL: In function dead_block_test_switch_loop: Constant terminator folding for loop Loop at depth 1 containing: %header
,%dead,%backedge @@ -77,16 +79,16 @@ ; CHECK-NEXT: preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_1:%.*]], [[HEADER_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 ; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] ; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: br label [[HEADER]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: br i1 false, label [[HEADER_BACKEDGE]], label [[EXIT:%.*]] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2:%.*]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]] @@ -114,18 +116,16 @@ ; CHECK-NEXT: preheader: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_1:%.*]], [[HEADER_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 ; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] ; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: br label [[HEADER]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: switch i32 1, label [[EXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2:%.*]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]]