Index: llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp =================================================================== --- llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -241,6 +241,7 @@ // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). + unsigned NumEdges = 1; if (Options.MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; @@ -250,6 +251,7 @@ // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); + ++NumEdges; } } @@ -321,7 +323,8 @@ // Update LCSSA form in the newly created exit block. if (Options.PreserveLCSSA) { - createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); + SmallVector Preds(NumEdges, TIBB); + createPHIsForSplitLoopExit(Preds, NewBB, DestBB); } if (!LoopPreds.empty()) { Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -189,14 +189,8 @@ // Assuming both header and latch are exiting, look for a phi which is only // used outside the loop (via a LCSSA phi) in the exit from the header. // This means that rotating the loop can remove the phi. -static bool profitableToRotateLoopExitingLatch(Loop *L) { - BasicBlock *Header = L->getHeader(); - BranchInst *BI = dyn_cast(Header->getTerminator()); - assert(BI && BI->isConditional() && "need header with conditional exit"); - BasicBlock *HeaderExit = BI->getSuccessor(0); - if (L->contains(HeaderExit)) - HeaderExit = BI->getSuccessor(1); - +static bool profitableToRotateLoopExitingLatch(BasicBlock *Header, + BasicBlock *HeaderExit) { for (auto &Phi : Header->phis()) { // Look for uses of this phi in the loop/via exits other than the header. if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) { @@ -275,8 +269,8 @@ BasicBlock *OrigHeader = L->getHeader(); BasicBlock *OrigLatch = L->getLoopLatch(); - BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); - if (!BI || BI->isUnconditional()) + Instruction *Term = OrigHeader->getTerminator(); + if (!isa(Term) && !isa(Term)) return Rotated; // If the loop header is not one of the loop exiting blocks then @@ -290,10 +284,30 @@ if (!OrigLatch) return Rotated; + // Find new Loop header. NewHeader is OrigHeader's one and only successor + // that is inside the loop. OrigHeader's other successor is outside the + // loop. Otherwise the loop is not suitable for rotation. + BasicBlock *Exit = nullptr; + BasicBlock *NewHeader = nullptr; + for (BasicBlock *SuccBB : successors(OrigHeader)) { + if (L->contains(SuccBB)) { + // We don't have a new unique header. + // TODO: Allow multi-edge to NewHeader. + if (NewHeader) + return Rotated; + NewHeader = SuccBB; + } else { + // TODO: Support multiple exits. + if (Exit && Exit != SuccBB) + return Rotated; + Exit = SuccBB; + } + } + // Rotate if either the loop latch does *not* exit the loop, or if the loop // latch was just simplified. Or if we think it will be profitable. if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && - !profitableToRotateLoopExitingLatch(L) && + !profitableToRotateLoopExitingLatch(OrigHeader, Exit) && !canRotateDeoptimizingLatchExit(L)) return Rotated; @@ -354,20 +368,9 @@ if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); - // Find new Loop header. NewHeader is a Header's one and only successor - // that is inside loop. Header's other successor is outside the - // loop. Otherwise loop is not suitable for rotation. - BasicBlock *Exit = BI->getSuccessor(0); - BasicBlock *NewHeader = BI->getSuccessor(1); - if (L->contains(Exit)) - std::swap(Exit, NewHeader); - assert(NewHeader && "Unable to determine new loop header"); - assert(L->contains(NewHeader) && !L->contains(Exit) && - "Unable to determine loop header and exit blocks"); - // This code assumes that the new header has exactly one predecessor. // Remove any single-entry PHI nodes in it. - assert(NewHeader->getSinglePredecessor() && + assert(NewHeader->getUniquePredecessor() && "New header doesn't have one pred!"); FoldSingleEntryPHINodes(NewHeader); @@ -594,11 +597,18 @@ // then we fold away the cond branch to an uncond branch. This simplifies the // loop in cases important for nested loops, and it also means we don't have // to split as many edges. - BranchInst *PHBI = cast(OrigPreheader->getTerminator()); - assert(PHBI->isConditional() && "Should be clone of BI condbr!"); - if (!isa(PHBI->getCondition()) || - PHBI->getSuccessor(cast(PHBI->getCondition())->isZero()) != - NewHeader) { + bool AlwaysBranchesIntoLoop = false; + Instruction *PHTerm = OrigPreheader->getTerminator(); + if (auto *PHBI = dyn_cast(PHTerm)) { + if (auto *Cond = dyn_cast(PHBI->getCondition())) + AlwaysBranchesIntoLoop = PHBI->getSuccessor(Cond->isZero()) == NewHeader; + } else if (auto *PHSI = dyn_cast(PHTerm)) { + if (auto *Cond = dyn_cast(PHSI->getCondition())) + AlwaysBranchesIntoLoop = + PHSI->findCaseValue(Cond)->getCaseSuccessor() == NewHeader; + } + + if (!AlwaysBranchesIntoLoop) { // The conditional branch can't be folded, handle the general case. // Split edges as necessary to preserve LoopSimplify form. @@ -606,15 +616,16 @@ // thus is not a preheader anymore. // Split the edge to form a real preheader. BasicBlock *NewPH = SplitCriticalEdge( - OrigPreheader, NewHeader, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + OrigPreheader, NewHeader, CriticalEdgeSplittingOptions(DT, LI, MSSAU) + .setPreserveLCSSA().setMergeIdenticalEdges()); NewPH->setName(NewHeader->getName() + ".lr.ph"); // Preserve canonical loop form, which means that 'Exit' should have only // one predecessor. Note that Exit could be an exit block for multiple // nested loops, causing both of the edges to now be critical and need to // be split. - SmallVector ExitPreds(pred_begin(Exit), pred_end(Exit)); + SmallSetVector ExitPreds(pred_begin(Exit), + pred_end(Exit)); bool SplitLatchEdge = false; for (BasicBlock *ExitPred : ExitPreds) { // We only need to split loop exit edges. @@ -624,19 +635,28 @@ continue; SplitLatchEdge |= L->getLoopLatch() == ExitPred; BasicBlock *ExitSplit = SplitCriticalEdge( - ExitPred, Exit, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + ExitPred, Exit, CriticalEdgeSplittingOptions(DT, LI, MSSAU) + .setPreserveLCSSA().setMergeIdenticalEdges()); ExitSplit->moveBefore(Exit); } assert(SplitLatchEdge && "Despite splitting all preds, failed to split latch exit?"); } else { // We can fold the conditional branch in the preheader, this makes things - // simpler. The first step is to remove the extra edge to the Exit block. - Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); - NewBI->setDebugLoc(PHBI->getDebugLoc()); - PHBI->eraseFromParent(); + // simpler. The first step is to remove extra edges to the Exit and + // NewHeader blocks. Only keep one NewHeader edge. + bool SeenNewHeader = false; + for (BasicBlock *SuccBB : successors(OrigPreheader)) { + if (SuccBB == NewHeader && !SeenNewHeader) { + SeenNewHeader = true; + continue; + } + SuccBB->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); + } + + BranchInst *NewBI = BranchInst::Create(NewHeader, PHTerm); + NewBI->setDebugLoc(PHTerm->getDebugLoc()); + PHTerm->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. if (DT) DT->deleteEdge(OrigPreheader, Exit); Index: llvm/test/Transforms/LoopRotate/switch.ll =================================================================== --- llvm/test/Transforms/LoopRotate/switch.ll +++ llvm/test/Transforms/LoopRotate/switch.ll @@ -86,21 +86,18 @@ define i64 @switch_multi_exit_known_entry() { ; CHECK-LABEL: @switch_multi_exit_known_entry( ; CHECK-NEXT: start: -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[STATE:%.*]] = phi i8 [ 0, [[START:%.*]] ], [ [[NEXT_STATE:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[COUNT:%.*]] = phi i64 [ 0, [[START]] ], [ [[INC:%.*]], [[LATCH]] ] -; CHECK-NEXT: switch i8 [[STATE]], label [[LATCH]] [ +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[COUNT1:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[INC:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT1]], 999 +; CHECK-NEXT: [[NEXT_STATE:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC]] = add i64 [[COUNT1]], 1 +; CHECK-NEXT: switch i8 [[NEXT_STATE]], label [[LATCH]] [ ; CHECK-NEXT: i8 1, label [[EXIT:%.*]] ; CHECK-NEXT: i8 2, label [[EXIT]] ; CHECK-NEXT: ] -; CHECK: latch: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT]], 999 -; CHECK-NEXT: [[NEXT_STATE]] = zext i1 [[CMP]] to i8 -; CHECK-NEXT: [[INC]] = add i64 [[COUNT]], 1 -; CHECK-NEXT: br label [[HEADER]] ; CHECK: exit: -; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[HEADER]] ], [ [[COUNT]], [[HEADER]] ] +; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[INC]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] ; CHECK-NEXT: ret i64 [[COUNT_LCSSA]] ; start: @@ -127,21 +124,26 @@ define i64 @switch_multi_exit_unknown_entry(i8 %start_state) { ; CHECK-LABEL: @switch_multi_exit_unknown_entry( ; CHECK-NEXT: start: -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[STATE:%.*]] = phi i8 [ [[START_STATE:%.*]], [[START:%.*]] ], [ [[NEXT_STATE:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[COUNT:%.*]] = phi i64 [ 0, [[START]] ], [ [[INC:%.*]], [[LATCH]] ] -; CHECK-NEXT: switch i8 [[STATE]], label [[LATCH]] [ +; CHECK-NEXT: switch i8 [[START_STATE:%.*]], label [[LATCH_LR_PH:%.*]] [ ; CHECK-NEXT: i8 1, label [[EXIT:%.*]] ; CHECK-NEXT: i8 2, label [[EXIT]] ; CHECK-NEXT: ] +; CHECK: latch.lr.ph: +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT]], 999 -; CHECK-NEXT: [[NEXT_STATE]] = zext i1 [[CMP]] to i8 -; CHECK-NEXT: [[INC]] = add i64 [[COUNT]], 1 -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: [[COUNT1:%.*]] = phi i64 [ 0, [[LATCH_LR_PH]] ], [ [[INC:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT1]], 999 +; CHECK-NEXT: [[NEXT_STATE:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC]] = add i64 [[COUNT1]], 1 +; CHECK-NEXT: switch i8 [[NEXT_STATE]], label [[LATCH]] [ +; CHECK-NEXT: i8 1, label [[HEADER_EXIT_CRIT_EDGE:%.*]] +; CHECK-NEXT: i8 2, label [[HEADER_EXIT_CRIT_EDGE]] +; CHECK-NEXT: ] +; CHECK: header.exit_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i64 [ [[INC]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[HEADER]] ], [ [[COUNT]], [[HEADER]] ] +; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[SPLIT]], [[HEADER_EXIT_CRIT_EDGE]] ], [ 0, [[START:%.*]] ], [ 0, [[START]] ] ; CHECK-NEXT: ret i64 [[COUNT_LCSSA]] ; start: