Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -67,6 +68,12 @@ "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden); +static cl::opt OutlineOptionalBranches( + "outline-optional-branches", + cl::desc("Put completely optional branches, i.e. branches with a common " + "post dominator, out of line."), + cl::init(false), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -188,6 +195,9 @@ /// \brief A handle to the target's lowering info. const TargetLoweringBase *TLI; + /// \brief A handle to the post dominator tree. + const MachinePostDominatorTree *MPDT; + /// \brief Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -244,6 +254,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -256,6 +267,7 @@ "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", "Branch Probability Basic Block Placement", false, false) @@ -363,6 +375,14 @@ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + // If we outline optional branches, look whether Succ post dominates all + // successors. Don't do this if Succ is cold, i.e. if one of the optional + // branches is hot. + if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && + MPDT->dominates(Succ, BB)) { + return Succ; + } + // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { @@ -1110,6 +1130,7 @@ MLI = &getAnalysis(); TII = F.getSubtarget().getInstrInfo(); TLI = F.getSubtarget().getTargetLowering(); + MPDT = &getAnalysis(); assert(BlockToChain.empty()); buildCFGChains(F); Index: test/CodeGen/X86/code_placement_outline_optional_branches.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/code_placement_outline_optional_branches.ll @@ -0,0 +1,50 @@ +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -outline-optional-branches < %s | FileCheck %s -check-prefix=CHECK-OUTLINE + +define void @foo(i32 %t1, i32 %t2) { +; Test that we lift the call to 'c' up to immediately follow the call to 'b' +; when we disable the cfg conflict check. +; +; CHECK-LABEL: foo: +; CHECK: callq a +; CHECK: callq b +; CHECK: callq c +; CHECK: callq d +; +; CHECK-OUTLINE-LABEL: foo: +; CHECK-OUTLINE: callq b +; CHECK-OUTLINE: callq c +; CHECK-OUTLINE: callq d +; CHECK-OUTLINE: callq a + +entry: + %cmp = icmp eq i32 %t1, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + call void @a() + br label %if.end + +if.end: + call void @b() + br label %hotbranch + +hotbranch: + %cmp2 = icmp eq i32 %t2, 0 + br i1 %cmp2, label %if.then2, label %if.end2, !prof !1 + +if.then2: + call void @c() + br label %if.end2 + +if.end2: + call void @d() + ret void +} + +declare void @a() +declare void @b() +declare void @c() +declare void @d() + +!1 = !{!"branch_weights", i32 64, i32 4}