Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -631,18 +631,46 @@ // BB->Succ. This is equivalent to looking the CFG backward with backward // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without // profile data). - + // -------------------------------------------------------------------------- + // Case 3: forked diamond + // S + // / \ + // / \ + // BB Pred + // | \ / | + // | \ / | + // | X | + // | / \ | + // | / \ | + // S1 S2 + // + // The current block is BB and edge BB->S1 is now being evaluated. + // As above S->BB was already selected because + // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2). + // + // topo-order: + // + // S-------| ---S + // | | | | + // ---BB | | BB + // | | | | + // | Pred----| | S1---- + // | | | | + // --(S1 or S2) ---Pred-- + // + // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2) + // + min(freq(Pred->S1), freq(Pred->S2)) + // Non-topo-order cost: + // In the worst case, S2 will not get laid out after Pred. + // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2). + // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2)) + // is 0. Then the non topo layout is better when + // freq(S->Pred) < freq(BB->S1). + // This is exactly what is checked below. + // Note there are other shapes that apply (Pred may not be a single block, + // but they all fit this general pattern.) BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB); - // Forward checking. For case 2, SuccProb will be 1. - if (SuccProb < HotProb) { - DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " " - << "Respecting topological ordering because " - << "probability is less than prob treshold: " - << SuccProb << "\n"); - return true; - } - // Make sure that a hot successor doesn't have a globally more // important predecessor. BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; @@ -653,11 +681,11 @@ (BlockFilter && !BlockFilter->count(Pred)) || BlockToChain[Pred] == &Chain) continue; - // Do backward checking. For case 1, it is actually redundant check. For - // case 2 above, we need a backward checking to filter out edges that are - // not 'strongly' biased. With profile data available, the check is mostly - // redundant too (when threshold prob is set at 50%) unless S has more than - // two successors. + // Do backward checking. + // For all cases above, we need a backward checking to filter out edges that + // are not 'strongly' biased. With profile data available, the check is + // mostly redundant for case 2 (when threshold prob is set at 50%) unless S + // has more than two successors. // BB Pred // \ / // Succ @@ -666,6 +694,8 @@ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) * // HotProb // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb + // Case 1 is covered too, because the first equation reduces to: + // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle) BlockFrequency PredEdgeFreq = MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) { Index: test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll =================================================================== --- test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll +++ test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll @@ -13,7 +13,7 @@ if.end: ; preds = %entry %and.i.i.i = and i32 %int1, 4 %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0 - br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i + br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i, !prof !1 land.rhs.i: ; preds = %if.end %cmp.i.i.i = icmp eq i8* %str12, %str13 @@ -36,7 +36,7 @@ if.end5: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i ; CHECK: %if.end5 ; CHECK: tbz - br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19 + br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19, !prof !1 land.rhs.i19: ; preds = %if.end5 %cmp.i.i.i18 = icmp eq i8* %str6, %str7 @@ -69,3 +69,4 @@ } attributes #0 = { nounwind ssp } +!1 = !{!"branch_weights", i32 3, i32 5} Index: test/CodeGen/AArch64/compare-branch.ll =================================================================== --- test/CodeGen/AArch64/compare-branch.ll +++ test/CodeGen/AArch64/compare-branch.ll @@ -8,25 +8,25 @@ %val1 = load volatile i32, i32* @var32 %tst1 = icmp eq i32 %val1, 0 - br i1 %tst1, label %end, label %test2 + br i1 %tst1, label %end, label %test2, !prof !1 ; CHECK: cbz {{w[0-9]+}}, .LBB test2: %val2 = load volatile i32, i32* @var32 %tst2 = icmp ne i32 %val2, 0 - br i1 %tst2, label %end, label %test3 + br i1 %tst2, label %end, label %test3, !prof !1 ; CHECK: cbnz {{w[0-9]+}}, .LBB test3: %val3 = load volatile i64, i64* @var64 %tst3 = icmp eq i64 %val3, 0 - br i1 %tst3, label %end, label %test4 + br i1 %tst3, label %end, label %test4, !prof !1 ; CHECK: cbz {{x[0-9]+}}, .LBB test4: %val4 = load volatile i64, i64* @var64 %tst4 = icmp ne i64 %val4, 0 - br i1 %tst4, label %end, label %test5 + br i1 %tst4, label %end, label %test5, !prof !1 ; CHECK: cbnz {{x[0-9]+}}, .LBB test5: @@ -36,3 +36,6 @@ end: ret void } + + +!1 = !{!"branch_weights", i32 1, i32 1} Index: test/CodeGen/AArch64/logical_shifted_reg.ll =================================================================== --- test/CodeGen/AArch64/logical_shifted_reg.ll +++ test/CodeGen/AArch64/logical_shifted_reg.ll @@ -198,7 +198,7 @@ ; CHECK: b.gt .L %simple_and = and i64 %val1, %val2 %tst1 = icmp sgt i64 %simple_and, 0 - br i1 %tst1, label %ret, label %test2 + br i1 %tst1, label %ret, label %test2, !prof !1 test2: ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 @@ -206,7 +206,7 @@ %shifted_op = shl i64 %val2, 63 %shifted_and = and i64 %val1, %shifted_op %tst2 = icmp slt i64 %shifted_and, 0 - br i1 %tst2, label %ret, label %test3 + br i1 %tst2, label %ret, label %test3, !prof !1 test3: ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12 @@ -214,7 +214,7 @@ %asr_op = ashr i64 %val2, 12 %asr_and = and i64 %asr_op, %val1 %tst3 = icmp sgt i64 %asr_and, 0 - br i1 %tst3, label %ret, label %other_exit + br i1 %tst3, label %ret, label %other_exit, !prof !1 other_exit: store volatile i64 %val1, i64* @var1_64 @@ -222,3 +222,5 @@ ret: ret void } + +!1 = !{!"branch_weights", i32 1, i32 1} Index: test/CodeGen/SystemZ/tdc-06.ll =================================================================== --- test/CodeGen/SystemZ/tdc-06.ll +++ test/CodeGen/SystemZ/tdc-06.ll @@ -14,14 +14,14 @@ ; CHECK: ltdbr %f0, %f0 ; CHECK: je [[RET:.L.*]] %testeq = fcmp oeq double %x, 0.000000e+00 - br i1 %testeq, label %ret, label %nonzero + br i1 %testeq, label %ret, label %nonzero, !prof !1 nonzero: ; CHECK: lhi %r2, 1 ; CHECK: cdbr %f0, %f0 ; CHECK: jo [[RET]] %testnan = fcmp uno double %x, 0.000000e+00 - br i1 %testnan, label %ret, label %nonzeroord + br i1 %testnan, label %ret, label %nonzeroord, !prof !1 nonzeroord: ; CHECK: lhi %r2, 2 @@ -29,7 +29,7 @@ ; CHECK: jl [[RET]] %abs = tail call double @llvm.fabs.f64(double %x) %testinf = fcmp oeq double %abs, 0x7FF0000000000000 - br i1 %testinf, label %ret, label %finite + br i1 %testinf, label %ret, label %finite, !prof !1 finite: ; CHECK: lhi %r2, 3 @@ -46,3 +46,5 @@ %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ] ret i32 %res } + +!1 = !{!"branch_weights", i32 1, i32 1} Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -1283,6 +1283,174 @@ ret void } +declare void @a() +declare void @b() + +define void @test_forked_hot_diamond(i32* %a) { +; Test that a hot-branch with probability > 80% followed by a 50/50 branch +; will not place the cold predecessor if the probability for the fallthrough +; remains above 80% +; CHECK-LABEL: test_forked_hot_diamond +; CHECK: %entry +; CHECK: %then +; CHECK: %fork1 +; CHECK: %else +; CHECK: %fork2 +; CHECK: %exit +entry: + %gep1 = getelementptr i32, i32* %a, i32 1 + %val1 = load i32, i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %then, label %else, !prof !5 + +then: + call void @hot_function() + %gep2 = getelementptr i32, i32* %a, i32 2 + %val2 = load i32, i32* %gep2 + %cond2 = icmp ugt i32 %val2, 2 + br i1 %cond2, label %fork1, label %fork2, !prof !8 + +else: + call void @cold_function() + %gep3 = getelementptr i32, i32* %a, i32 3 + %val3 = load i32, i32* %gep3 + %cond3 = icmp ugt i32 %val3, 3 + br i1 %cond3, label %fork1, label %fork2, !prof !8 + +fork1: + call void @a() + br label %exit + +fork2: + call void @b() + br label %exit + +exit: + call void @hot_function() + ret void +} + +define void @test_forked_hot_diamond_gets_cold(i32* %a) { +; Test that a hot-branch with probability > 80% followed by a 50/50 branch +; will place the cold predecessor if the probability for the fallthrough +; falls below 80% +; The probability for both branches is 85%. For then2 vs else1 +; this results in a compounded probability of 83%. +; Neither then2->fork1 nor then2->fork2 has a large enough relative +; probability to break the CFG. +; Relative probs: +; then2 -> fork1 vs else1 -> fork1 = 71% +; then2 -> fork2 vs else2 -> fork2 = 74% +; CHECK-LABEL: test_forked_hot_diamond_gets_cold +; CHECK: %entry +; CHECK: %then1 +; CHECK: %then2 +; CHECK: %else1 +; CHECK: %fork1 +; CHECK: %else2 +; CHECK: %fork2 +; CHECK: %exit +entry: + %gep1 = getelementptr i32, i32* %a, i32 1 + %val1 = load i32, i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %then1, label %else1, !prof !9 + +then1: + call void @hot_function() + %gep2 = getelementptr i32, i32* %a, i32 2 + %val2 = load i32, i32* %gep2 + %cond2 = icmp ugt i32 %val2, 2 + br i1 %cond2, label %then2, label %else2, !prof !9 + +else1: + call void @cold_function() + br label %fork1 + +then2: + call void @hot_function() + %gep3 = getelementptr i32, i32* %a, i32 3 + %val3 = load i32, i32* %gep2 + %cond3 = icmp ugt i32 %val2, 3 + br i1 %cond3, label %fork1, label %fork2, !prof !8 + +else2: + call void @cold_function() + br label %fork2 + +fork1: + call void @a() + br label %exit + +fork2: + call void @b() + br label %exit + +exit: + call void @hot_function() + ret void +} + +define void @test_forked_hot_diamond_stays_hot(i32* %a) { +; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50 +; branch will not place the cold predecessor as the probability for the +; fallthrough stays above 80% +; (1:8) followed by (1:1) is still (1:4) +; Here we use 90% probability because two in a row +; have a 89 % probability vs the original branch. +; CHECK-LABEL: test_forked_hot_diamond_stays_hot +; CHECK: %entry +; CHECK: %then1 +; CHECK: %then2 +; CHECK: %fork1 +; CHECK: %else1 +; CHECK: %else2 +; CHECK: %fork2 +; CHECK: %exit +entry: + %gep1 = getelementptr i32, i32* %a, i32 1 + %val1 = load i32, i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %then1, label %else1, !prof !10 + +then1: + call void @hot_function() + %gep2 = getelementptr i32, i32* %a, i32 2 + %val2 = load i32, i32* %gep2 + %cond2 = icmp ugt i32 %val2, 2 + br i1 %cond2, label %then2, label %else2, !prof !10 + +else1: + call void @cold_function() + br label %fork1 + +then2: + call void @hot_function() + %gep3 = getelementptr i32, i32* %a, i32 3 + %val3 = load i32, i32* %gep2 + %cond3 = icmp ugt i32 %val2, 3 + br i1 %cond3, label %fork1, label %fork2, !prof !8 + +else2: + call void @cold_function() + br label %fork2 + +fork1: + call void @a() + br label %exit + +fork2: + call void @b() + br label %exit + +exit: + call void @hot_function() + ret void +} + !5 = !{!"branch_weights", i32 84, i32 16} !6 = !{!"function_entry_count", i32 10} !7 = !{!"branch_weights", i32 60, i32 40} +!8 = !{!"branch_weights", i32 5001, i32 4999} +!9 = !{!"branch_weights", i32 85, i32 15} +!10 = !{!"branch_weights", i32 90, i32 10}