Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -457,6 +457,7 @@ const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopTop( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); + bool isBestPred(const MachineBasicBlock *Succ, const MachineBasicBlock *MBB); MachineBasicBlock *findBestLoopExit( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); BlockFilterSet collectLoopBlockSet(const MachineLoop &L); @@ -1865,11 +1866,32 @@ return BestPred; } +/// Check if MBB is the best predecessor of block Succ. +bool +MachineBlockPlacement::isBestPred(const MachineBasicBlock *Succ, + const MachineBasicBlock *MBB) { + auto Prob = MBPI->getEdgeProbability(MBB, Succ); + BlockFrequency MaxFreq = MBFI->getBlockFreq(MBB) * Prob; + + for (MachineBasicBlock *Pred : Succ->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + // Pred can be placed before Succ if Pred is not in any chain, + // or it is at the end of some chain. + if (!PredChain || Pred == *std::prev(PredChain->end())) { + Prob = MBPI->getEdgeProbability(Pred, Succ); + BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * Prob; + if (EdgeFreq > MaxFreq) + return false; + } + } + return true; +} + /// Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best -/// block to layout at the top of the loop. Typically this is done to maximize -/// fallthrough opportunities. +/// block to layout at the bottom of the loop. Typically this is done to +/// maximize fallthrough opportunities. MachineBasicBlock * MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { @@ -1937,6 +1959,9 @@ BlocksExitingToOuterLoop.insert(MBB); } + if (!isBestPred(Succ, MBB)) + continue; + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " << getBlockName(Succ) << " [L:" << SuccLoopDepth Index: test/CodeGen/X86/bb_rotate.ll =================================================================== --- test/CodeGen/X86/bb_rotate.ll +++ test/CodeGen/X86/bb_rotate.ll @@ -48,6 +48,58 @@ ret i1 %result } +; Function @no_rotate is same as @no_viable_top_fallthrough. The only difference +; is profile, so the block %.exit should be placed after %.bb1 instead of +; rotating the loop. +define i1 @no_rotate() { +; CHECK-LABEL: no_rotate +; CHECK: %.entry +; CHECK: %.bb1 +; CHECK: %.exit +; CHECK: %.stop +; CHECK: %.header +; CHECK: %.bb2 +; CHECK: %.middle +; CHECK: %.backedge +; CHECK: %.bb3 +.entry: + %val1 = call i1 @foo() + br i1 %val1, label %.bb1, label %.header, !prof !10 + +.bb1: + %val2 = call i1 @foo() + br i1 %val2, label %.stop, label %.exit, !prof !11 + +.header: + %val3 = call i1 @foo() + br i1 %val3, label %.bb2, label %.exit + +.bb2: + %val4 = call i1 @foo() + br i1 %val4, label %.middle, label %.bb3, !prof !10 + +.middle: + %val5 = call i1 @foo() + br i1 %val5, label %.header, label %.backedge + +.backedge: + %val6 = call i1 @foo() + br label %.header + +.bb3: + %val7 = call i1 @foo() + br label %.middle + +.exit: + %val8 = call i1 @foo() + br label %.stop + +.stop: + %result = call i1 @foo() + ret i1 %result +} + declare i1 @foo() !10 = !{!"branch_weights", i32 90, i32 10} +!11 = !{!"branch_weights", i32 10, i32 90}