Index: llvm/include/llvm/Analysis/LoopInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/LoopInfoImpl.h +++ llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -211,7 +211,7 @@ BlockT *Header = getHeader(); BlockT *Latch = nullptr; for (const auto Pred : children>(Header)) { - if (contains(Pred)) { + if (Latch != Pred && contains(Pred)) { if (Latch) return nullptr; Latch = Pred; Index: llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp =================================================================== --- llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -197,6 +197,7 @@ // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). + unsigned NumEdges = 1; if (Options.MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; @@ -206,6 +207,7 @@ // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); + ++NumEdges; } } @@ -278,7 +280,8 @@ // Update LCSSA form in the newly created exit block. if (Options.PreserveLCSSA) { - createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); + SmallVector Preds(NumEdges, TIBB); + createPHIsForSplitLoopExit(Preds, NewBB, DestBB); } // The only that we can break LoopSimplify form by splitting a critical Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -216,8 +216,8 @@ BasicBlock *OrigHeader = L->getHeader(); BasicBlock *OrigLatch = L->getLoopLatch(); - BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); - if (!BI || BI->isUnconditional()) + Instruction *Term = OrigHeader->getTerminator(); + if (!isa(Term) && !isa(Term)) return false; // If the loop header is not one of the loop exiting blocks then @@ -237,6 +237,25 @@ !shouldRotateLoopExitingLatch(L)) return false; + // Find new Loop header. NewHeader is a Header's one and only successor + // that is inside loop. Header's other successor is outside the + // loop. Otherwise loop is not suitable for rotation. + BasicBlock *Exit = nullptr; + BasicBlock *NewHeader = nullptr; + for (BasicBlock *SuccBB : successors(OrigHeader)) { + if (L->contains(SuccBB)) { + // We don't have a new unique header. + if (NewHeader && NewHeader != SuccBB) + return false; + NewHeader = SuccBB; + } else { + // TODO: In principle, we could also support multiple exits here. + if (Exit && Exit != SuccBB) + return false; + Exit = SuccBB; + } + } + // Check size of original header and reject loop if it is very big or we can't // duplicate blocks inside it. { @@ -282,20 +301,9 @@ if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); - // Find new Loop header. NewHeader is a Header's one and only successor - // that is inside loop. Header's other successor is outside the - // loop. Otherwise loop is not suitable for rotation. - BasicBlock *Exit = BI->getSuccessor(0); - BasicBlock *NewHeader = BI->getSuccessor(1); - if (L->contains(Exit)) - std::swap(Exit, NewHeader); - assert(NewHeader && "Unable to determine new loop header"); - assert(L->contains(NewHeader) && !L->contains(Exit) && - "Unable to determine loop header and exit blocks"); - // This code assumes that the new header has exactly one predecessor. // Remove any single-entry PHI nodes in it. - assert(NewHeader->getSinglePredecessor() && + assert(NewHeader->getUniquePredecessor() && "New header doesn't have one pred!"); FoldSingleEntryPHINodes(NewHeader); @@ -450,11 +458,18 @@ // then we fold away the cond branch to an uncond branch. This simplifies the // loop in cases important for nested loops, and it also means we don't have // to split as many edges. - BranchInst *PHBI = cast(OrigPreheader->getTerminator()); - assert(PHBI->isConditional() && "Should be clone of BI condbr!"); - if (!isa(PHBI->getCondition()) || - PHBI->getSuccessor(cast(PHBI->getCondition())->isZero()) != - NewHeader) { + bool AlwaysBranchesIntoLoop = false; + Instruction *PHTerm = OrigPreheader->getTerminator(); + if (auto *PHBI = dyn_cast(PHTerm)) { + if (auto *Cond = dyn_cast(PHBI->getCondition())) + AlwaysBranchesIntoLoop = PHBI->getSuccessor(Cond->isZero()) == NewHeader; + } else if (auto *PHSI = dyn_cast(PHTerm)) { + if (auto *Cond = dyn_cast(PHSI->getCondition())) + AlwaysBranchesIntoLoop = + PHSI->findCaseValue(Cond)->getCaseSuccessor() == NewHeader; + } + + if (!AlwaysBranchesIntoLoop) { // The conditional branch can't be folded, handle the general case. // Split edges as necessary to preserve LoopSimplify form. @@ -463,14 +478,15 @@ // Split the edge to form a real preheader. BasicBlock *NewPH = SplitCriticalEdge( OrigPreheader, NewHeader, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + CriticalEdgeSplittingOptions(DT, LI, MSSAU) + .setPreserveLCSSA().setMergeIdenticalEdges()); NewPH->setName(NewHeader->getName() + ".lr.ph"); // Preserve canonical loop form, which means that 'Exit' should have only // one predecessor. Note that Exit could be an exit block for multiple // nested loops, causing both of the edges to now be critical and need to // be split. - SmallVector ExitPreds(pred_begin(Exit), pred_end(Exit)); + SmallSetVector ExitPreds(pred_begin(Exit), pred_end(Exit)); bool SplitLatchEdge = false; for (BasicBlock *ExitPred : ExitPreds) { // We only need to split loop exit edges. @@ -481,18 +497,28 @@ SplitLatchEdge |= L->getLoopLatch() == ExitPred; BasicBlock *ExitSplit = SplitCriticalEdge( ExitPred, Exit, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + CriticalEdgeSplittingOptions(DT, LI, MSSAU) + .setPreserveLCSSA().setMergeIdenticalEdges()); ExitSplit->moveBefore(Exit); } assert(SplitLatchEdge && "Despite splitting all preds, failed to split latch exit?"); } else { // We can fold the conditional branch in the preheader, this makes things - // simpler. The first step is to remove the extra edge to the Exit block. - Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); - NewBI->setDebugLoc(PHBI->getDebugLoc()); - PHBI->eraseFromParent(); + // simpler. The first step is to remove extra edges to the Exit and + // NewHeader blocks. Only keep one NewHeader edge. + bool SeenNewHeader = false; + for (BasicBlock *SuccBB : successors(OrigPreheader)) { + if (SuccBB == NewHeader && !SeenNewHeader) { + SeenNewHeader = true; + continue; + } + SuccBB->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); + } + + BranchInst *NewBI = BranchInst::Create(NewHeader, PHTerm); + NewBI->setDebugLoc(PHTerm->getDebugLoc()); + PHTerm->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. if (DT) DT->deleteEdge(OrigPreheader, Exit); Index: llvm/test/Transforms/LoopRotate/switch.ll =================================================================== --- llvm/test/Transforms/LoopRotate/switch.ll +++ llvm/test/Transforms/LoopRotate/switch.ll @@ -4,21 +4,19 @@ define i64 @switch_multi_entry_known_entry() { ; CHECK-LABEL: @switch_multi_entry_known_entry( ; CHECK-NEXT: start: -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[STATE:%.*]] = phi i8 [ 2, [[START:%.*]] ], [ [[NEXT_STATE:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[COUNT:%.*]] = phi i64 [ 0, [[START]] ], [ [[INC:%.*]], [[LATCH]] ] -; CHECK-NEXT: switch i8 [[STATE]], label [[EXIT:%.*]] [ +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[COUNT2:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[INC:%.*]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] +; CHECK-NEXT: [[COUNT1:%.*]] = phi i64 [ 0, [[START]] ], [ [[INC]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT2]], 999 +; CHECK-NEXT: [[NEXT_STATE:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC]] = add i64 [[COUNT1]], 1 +; CHECK-NEXT: switch i8 [[NEXT_STATE]], label [[EXIT:%.*]] [ ; CHECK-NEXT: i8 0, label [[LATCH]] ; CHECK-NEXT: i8 2, label [[LATCH]] ; CHECK-NEXT: ] -; CHECK: latch: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT]], 999 -; CHECK-NEXT: [[NEXT_STATE]] = zext i1 [[CMP]] to i8 -; CHECK-NEXT: [[INC]] = add i64 [[COUNT]], 1 -; CHECK-NEXT: br label [[HEADER]] ; CHECK: exit: -; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[HEADER]] ] +; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[INC]], [[LATCH]] ] ; CHECK-NEXT: ret i64 [[COUNT_LCSSA]] ; start: @@ -45,21 +43,27 @@ define i64 @switch_multi_entry_unknown_entry(i8 %start_state) { ; CHECK-LABEL: @switch_multi_entry_unknown_entry( ; CHECK-NEXT: start: -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[STATE:%.*]] = phi i8 [ [[START_STATE:%.*]], [[START:%.*]] ], [ [[NEXT_STATE:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[COUNT:%.*]] = phi i64 [ 0, [[START]] ], [ [[INC:%.*]], [[LATCH]] ] -; CHECK-NEXT: switch i8 [[STATE]], label [[EXIT:%.*]] [ +; CHECK-NEXT: switch i8 [[START_STATE:%.*]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i8 0, label [[LATCH_LR_PH:%.*]] +; CHECK-NEXT: i8 2, label [[LATCH_LR_PH]] +; CHECK-NEXT: ] +; CHECK: latch.lr.ph: +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[COUNT2:%.*]] = phi i64 [ 0, [[LATCH_LR_PH]] ], [ [[INC:%.*]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] +; CHECK-NEXT: [[COUNT1:%.*]] = phi i64 [ 0, [[LATCH_LR_PH]] ], [ [[INC]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT2]], 999 +; CHECK-NEXT: [[NEXT_STATE:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC]] = add i64 [[COUNT1]], 1 +; CHECK-NEXT: switch i8 [[NEXT_STATE]], label [[HEADER_EXIT_CRIT_EDGE:%.*]] [ ; CHECK-NEXT: i8 0, label [[LATCH]] ; CHECK-NEXT: i8 2, label [[LATCH]] ; CHECK-NEXT: ] -; CHECK: latch: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT]], 999 -; CHECK-NEXT: [[NEXT_STATE]] = zext i1 [[CMP]] to i8 -; CHECK-NEXT: [[INC]] = add i64 [[COUNT]], 1 -; CHECK-NEXT: br label [[HEADER]] +; CHECK: header.exit_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i64 [ [[INC]], [[LATCH]] ] +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[HEADER]] ] +; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[SPLIT]], [[HEADER_EXIT_CRIT_EDGE]] ], [ 0, [[START:%.*]] ] ; CHECK-NEXT: ret i64 [[COUNT_LCSSA]] ; start: @@ -86,21 +90,18 @@ define i64 @switch_multi_exit_known_entry() { ; CHECK-LABEL: @switch_multi_exit_known_entry( ; CHECK-NEXT: start: -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[STATE:%.*]] = phi i8 [ 0, [[START:%.*]] ], [ [[NEXT_STATE:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[COUNT:%.*]] = phi i64 [ 0, [[START]] ], [ [[INC:%.*]], [[LATCH]] ] -; CHECK-NEXT: switch i8 [[STATE]], label [[LATCH]] [ +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[COUNT1:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[INC:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT1]], 999 +; CHECK-NEXT: [[NEXT_STATE:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC]] = add i64 [[COUNT1]], 1 +; CHECK-NEXT: switch i8 [[NEXT_STATE]], label [[LATCH]] [ ; CHECK-NEXT: i8 1, label [[EXIT:%.*]] ; CHECK-NEXT: i8 2, label [[EXIT]] ; CHECK-NEXT: ] -; CHECK: latch: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT]], 999 -; CHECK-NEXT: [[NEXT_STATE]] = zext i1 [[CMP]] to i8 -; CHECK-NEXT: [[INC]] = add i64 [[COUNT]], 1 -; CHECK-NEXT: br label [[HEADER]] ; CHECK: exit: -; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[HEADER]] ], [ [[COUNT]], [[HEADER]] ] +; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[INC]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] ; CHECK-NEXT: ret i64 [[COUNT_LCSSA]] ; start: @@ -127,21 +128,26 @@ define i64 @switch_multi_exit_unknown_entry(i8 %start_state) { ; CHECK-LABEL: @switch_multi_exit_unknown_entry( ; CHECK-NEXT: start: -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[STATE:%.*]] = phi i8 [ [[START_STATE:%.*]], [[START:%.*]] ], [ [[NEXT_STATE:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[COUNT:%.*]] = phi i64 [ 0, [[START]] ], [ [[INC:%.*]], [[LATCH]] ] -; CHECK-NEXT: switch i8 [[STATE]], label [[LATCH]] [ +; CHECK-NEXT: switch i8 [[START_STATE:%.*]], label [[LATCH_LR_PH:%.*]] [ ; CHECK-NEXT: i8 1, label [[EXIT:%.*]] ; CHECK-NEXT: i8 2, label [[EXIT]] ; CHECK-NEXT: ] +; CHECK: latch.lr.ph: +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT]], 999 -; CHECK-NEXT: [[NEXT_STATE]] = zext i1 [[CMP]] to i8 -; CHECK-NEXT: [[INC]] = add i64 [[COUNT]], 1 -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: [[COUNT1:%.*]] = phi i64 [ 0, [[LATCH_LR_PH]] ], [ [[INC:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[COUNT1]], 999 +; CHECK-NEXT: [[NEXT_STATE:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC]] = add i64 [[COUNT1]], 1 +; CHECK-NEXT: switch i8 [[NEXT_STATE]], label [[LATCH]] [ +; CHECK-NEXT: i8 1, label [[HEADER_EXIT_CRIT_EDGE:%.*]] +; CHECK-NEXT: i8 2, label [[HEADER_EXIT_CRIT_EDGE]] +; CHECK-NEXT: ] +; CHECK: header.exit_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i64 [ [[INC]], [[LATCH]] ], [ [[INC]], [[LATCH]] ] +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[HEADER]] ], [ [[COUNT]], [[HEADER]] ] +; CHECK-NEXT: [[COUNT_LCSSA:%.*]] = phi i64 [ [[SPLIT]], [[HEADER_EXIT_CRIT_EDGE]] ], [ 0, [[START:%.*]] ], [ 0, [[START]] ] ; CHECK-NEXT: ret i64 [[COUNT_LCSSA]] ; start: