Index: lib/CodeGen/MachineSink.cpp =================================================================== --- lib/CodeGen/MachineSink.cpp +++ lib/CodeGen/MachineSink.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -41,6 +42,12 @@ cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden); +static cl::opt +UseBlockFreqInfo("machine-sink-bfi", + cl::desc("Use block frequency info to find successors to sink"), + cl::init(true), cl::Hidden); + + STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); @@ -53,6 +60,7 @@ MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; + const MachineBlockFrequencyInfo *MBFI; AliasAnalysis *AA; // Remember which edges have been considered for breaking. @@ -81,6 +89,8 @@ AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + if (UseBlockFreqInfo) + AU.addRequired(); } void releaseMemory() override { @@ -247,6 +257,7 @@ DT = &getAnalysis(); PDT = &getAnalysis(); LI = &getAnalysis(); + MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; AA = &getAnalysis(); bool EverMadeChange = false; @@ -566,14 +577,20 @@ } // Otherwise, we should look at all the successors and decide which one - // we should sink to. - // We give successors with smaller loop depth higher priority. - SmallVector Succs(MBB->succ_begin(), MBB->succ_end()); - // Sort Successors according to their loop depth. + // we should sink to. If we have reliable block frequency information + // (frequency != 0) available, give successors with smaller frequencies + // higher priority, otherwise prioritize smaller loop depths. + SmallVector Succs(MBB->succ_begin(), + MBB->succ_end()); + // Sort Successors according to their loop depth or block frequency info. std::stable_sort( Succs.begin(), Succs.end(), - [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) { - return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { + uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; + uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; + bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; + return HasBlockFreq ? LHSFreq < RHSFreq + : LI->getLoopDepth(L) < LI->getLoopDepth(R); }); for (SmallVectorImpl::iterator SI = Succs.begin(), E = Succs.end(); SI != E; ++SI) { Index: test/CodeGen/X86/sink-blockfreq.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/sink-blockfreq.ll @@ -0,0 +1,45 @@ +; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI +; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI + +; Test that by changing BlockFrequencyInfo we change the order in which +; machine-sink looks for sucessor blocks. By not using BFI, both G and B +; have the same loop depth and no instructions is sinked - B is selected but +; can't be used as to avoid breaking a non profitable critical edge. By using +; BFI, "mul" is sinked into the less frequent block G. +define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp { +; MSINK_BFI-LABEL: sink_freqinfo +; MSINK_BFI: jl +; MSINK_BFI-NEXT: ## BB# +; MSINK_BFI-NEXT: imull + +; MSINK_NOBFI-LABEL: sink_freqinfo +; MSINK_NOBFI: imull +; MSINK_NOBFI: jl +entry: + br label %B + +B: + %ee = phi i32 [ 0, %entry ], [ %inc, %F ] + %xx = sub i32 %a, %ee + %cond0 = icmp slt i32 %xx, 0 + br i1 %cond0, label %F, label %exit, !prof !0 + +F: + %inc = add nsw i32 %xx, 2 + %aa = mul nsw i32 %b, %inc + %exitcond = icmp slt i32 %inc, %a + br i1 %exitcond, label %B, label %G, !prof !1 + +G: + %ii = add nsw i32 %aa, %a + %ll = add i32 %b, 45 + %exitcond2 = icmp sge i32 %ii, %b + br i1 %exitcond2, label %G, label %exit, !prof !2 + +exit: + ret i32 0 +} + +!0 = metadata !{metadata !"branch_weights", i32 4, i32 1} +!1 = metadata !{metadata !"branch_weights", i32 128, i32 1} +!2 = metadata !{metadata !"branch_weights", i32 1, i32 1}