Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -125,6 +125,7 @@ cl::init(true), cl::Hidden); extern cl::opt StaticLikelyProb; +extern cl::opt ProfileLikelyProb; namespace { class BlockChain; @@ -520,12 +521,10 @@ return false; } -// FIXME (PGO handling) -// For now this method just returns a fixed threshold. It needs to be enhanced -// such that BB and Succ is passed in so that CFG shapes are examined such that -// the threshold is computed with more precise cost model when PGO is on. -static BranchProbability getLayoutSuccessorProbThreshold() { - BranchProbability HotProb(StaticLikelyProb, 100); +// Returns the hotness threshold with and without profile. +static BranchProbability getLayoutSuccessorProbThreshold(bool HasProfile) { + BranchProbability HotProb( + HasProfile ? ProfileLikelyProb : StaticLikelyProb, 100); return HotProb; } @@ -553,15 +552,16 @@ // set Succ as the layout successor of BB. Picking Succ as BB's // successor breaks the CFG constraints. With this layout, Pred BB // is forced to be outlined, so the overall cost will be cost of the - // branch taken from BB to Pred, plus the cost of back taken branch - // from Pred to Succ, as well as the additional cost asssociated - // with the needed unconditional jump instruction from Pred To Succ. - // The cost of the topological order layout is the taken branch cost - // from BB to Succ, so to make BB->Succ a viable candidate, the following - // must hold: - // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost - // < freq(BB->Succ) * taken_branch_cost. - // Ignoring unconditional jump cost, we get + // taken conditonal branch from BB to Pred, plus the cost of unconditional + // branch from Pred to Succ. + // The cost of the topological order layout is the cost of the taken + // conditional branch from BB to Succ. + // So to make BB->Succ a viable candidate, the following must hold: + // freq(BB->pred) * taken_conditonal_branch_cost + // + freq(Pred->Succ) * unconditional_branch_cost + // < freq(BB->Succ) * taken_branch_cost + // Assuming Pred has only one predecessor, and taken_conditional_branch_cost + // equals unconditional_branch_cost we get // freq(BB->Succ) > 2 * freq(BB->Pred), i.e., // prob(BB->Succ) > 2 * prob(BB->Pred) // @@ -593,7 +593,8 @@ // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without // profile data). - BranchProbability HotProb = getLayoutSuccessorProbThreshold(); + BranchProbability HotProb = getLayoutSuccessorProbThreshold( + BB->getParent()->getFunction()->getEntryCount().hasValue()); // Forward checking. For case 2, SuccProb will be 1. if (SuccProb < HotProb) { @@ -612,11 +613,19 @@ (BlockFilter && !BlockFilter->count(Pred)) || BlockToChain[Pred] == &Chain) continue; - // Do backward checking. For case 1, it is actually redundant check. For - // case 2 above, we need a backward checking to filter out edges that are - // not 'strongly' biased. With profile data available, the check is mostly - // redundant too (when threshold prob is set at 50%) unless S has more than - // two successors. + + BlockFrequency PredEdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); + // For case 1, Pred is successor of BB. The frequency of BB->Pred should + // be added to PredEdgeFrequency to compare with CandidateEdgeFreq. + if (BB->isSuccessor(Pred)) + PredEdgeFreq += + MBFI->getBlockFreq(BB) * MBPI->getEdgeProbability(BB, Pred); + + // Do backward checking. For both case 1 and 2 above, we need a backward + // checking to filter out edges that are not 'strongly' biased. With + // profile data available, the check is mostly redundant too (when + // threshold prob is set at 50%) unless S has more than two successors. // BB Pred // \ / // Succ @@ -625,8 +634,6 @@ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) * // HotProb // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb - BlockFrequency PredEdgeFreq = - MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) { BadCFGConflict = true; break; Index: lib/CodeGen/MachineBranchProbabilityInfo.cpp =================================================================== --- lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -29,6 +29,12 @@ cl::desc("branch probability threshold to be considered very likely"), cl::init(80), cl::Hidden); +cl::opt ProfileLikelyProb( + "profile-likely-prob", + cl::desc("branch probability threshold to be considered very likely " + "when profile is available"), + cl::init(51), cl::Hidden); + char MachineBranchProbabilityInfo::ID = 0; void MachineBranchProbabilityInfo::anchor() {} Index: test/CodeGen/AArch64/fast-isel-branch-cond-split.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -85,6 +85,6 @@ declare i64 @bar() !0 = !{!"branch_weights", i32 5128, i32 32} -!1 = !{!"branch_weights", i32 1024, i32 4136} +!1 = !{!"branch_weights", i32 1024, i32 9136} !2 = !{} Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -1207,4 +1207,76 @@ ret void } +define void @test_hot_branch_triangle(i32* %a) { +; Test that a hot branch that has a probability a between 80% and 90% will +; not break triangle shaped CFG constrains when doing block placement. +; CHECK-LABEL: test_hot_branch_triangle: +; CHECK: %entry +; CHECK: %then +; CHECK: %exit + +entry: + %gep1 = getelementptr i32, i32* %a, i32 1 + %val1 = load i32, i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %exit, label %then, !prof !5 + +then: + call void @hot_function() + br label %exit + +exit: + call void @hot_function() + ret void +} + +define void @test_hot_branch_triangle_reverse(i32* %a) { +; Test that a hot branch that has a probability a little larger than 90% will +; break triangle shaped CFG constrains when doing block placement. +; CHECK-LABEL: test_hot_branch_triangle_reverse: +; CHECK: %entry +; CHECK: %exit +; CHECK: %then + +entry: + %gep1 = getelementptr i32, i32* %a, i32 1 + %val1 = load i32, i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %exit, label %then, !prof !6 + +then: + call void @hot_function() + br label %exit + +exit: + call void @hot_function() + ret void +} + +define void @test_hot_branch_triangle_profile(i32* %a) !prof !7 { +; Test that a hot branch that has a probability a little larger than 80% will +; break triangle shaped CFG constrains when doing block placement if profile +; is present. +; CHECK-LABEL: test_hot_branch_triangle_profile: +; CHECK: %entry +; CHECK: %exit +; CHECK: %then + +entry: + %gep1 = getelementptr i32, i32* %a, i32 1 + %val1 = load i32, i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %exit, label %then, !prof !5 + +then: + call void @hot_function() + br label %exit + +exit: + call void @hot_function() + ret void +} + !5 = !{!"branch_weights", i32 84, i32 16} +!6 = !{!"branch_weights", i32 100, i32 10} +!7 = !{!"function_entry_count", i32 10}