Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -957,6 +957,7 @@ return Result; } + bool ComputeBestB = true; // Did we pick the triangle edge? If tail-duplication is profitable, do // that instead. Otherwise merge the triangle edge now while we know it is // optimal. @@ -978,10 +979,18 @@ Result.ShouldTailDup = true; return Result; } + // If any of the BestA.Dest's successor is outside of the Trellis, we should + // not use BestB as it may not be optimal. + for (MachineBasicBlock *Succ : BestA.Dest->successors()) + if (!Successors.count(Succ)) { + ComputeBestB = false; + break; + } } - // We have already computed the optimal edge for the other side of the - // trellis. - ComputedEdges[BestB.Src] = { BestB.Dest, false }; + if (ComputeBestB) + // We have already computed the optimal edge for the other side of the + // trellis. + ComputedEdges[BestB.Src] = { BestB.Dest, false }; auto TrellisSucc = BestA.Dest; DEBUG(BranchProbability SuccProb = getAdjustedProbability( Index: test/CodeGen/PowerPC/tail-dup-layout.ll =================================================================== --- test/CodeGen/PowerPC/tail-dup-layout.ll +++ test/CodeGen/PowerPC/tail-dup-layout.ll @@ -474,6 +474,52 @@ ret void } +; Verify that in the triangular trellis case, if there is hot successor +; outside of trellis, we should be able to connect them by not connecting +; the triagular edges. +; CHECK-LABEL: trellis_triangle +; CHECK: # %entry +; CHECK: # %b +; CHECK: # %d +; CHECK: # %ret +; CHECK: # %c +; CHECK: # %e +define void @trellis_no_triangle(i32 %tag) { +entry: + br label %a +a: + call void @a() + call void @a() + %tagbits.a = and i32 %tag, 3 + %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 + br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2 +b: + call void @b() + call void @b() + %tagbits.b = and i32 %tag, 12 + %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 + br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1 +d: + call void @d() + call void @d() + %tagbits.d = and i32 %tag, 48 + %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 + br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2 +c: + call void @c() + call void @c() + %tagbits.c = and i32 %tag, 12 + %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 + br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1 +e: + call void @e() + call void @e() + br label %ret +ret: + call void @f() + ret void +} + declare void @a() declare void @b() declare void @c() @@ -492,3 +538,6 @@ !5 = !{!"branch_weights", i32 2, i32 8} !6 = !{!"branch_weights", i32 3, i32 4} !7 = !{!"branch_weights", i32 4, i32 2} +!8 = !{!"branch_weights", i32 98, i32 2} +!9 = !{!"branch_weights", i32 97, i32 1} +!10 = !{!"branch_weights", i32 96, i32 2}