Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -1944,6 +1944,22 @@ if (ExitIt == LoopChain.end()) return; + // If the header has viable fallthrough and our exit candidate is + // a predecessor of next element in built loop chain then rotation + // will introduce an unnecessary branch because before we had a fallthrough + // from exit candidate but now we will have a branch. This branch will not be + // extra one only if the last block in a chain is a predecessor of first + // element in chain, because then rotation will eliminate one branch. + if (ViableTopFallthrough) { + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + if (NextBlockInChain->isPredecessor(ExitingBB)) + if (!Top->isPredecessor(Bottom)) + return; + } + + DEBUG(dbgs() << "Do loop rotation with new exit block " + << getBlockName(ExitingBB) << "\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -354,6 +354,7 @@ ; single-source GCC. ; CHECK-LABEL: unnatural_cfg2 ; CHECK: %entry +; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 ; CHECK: %loop.body4 @@ -361,7 +362,6 @@ ; CHECK: %loop.inner2.begin ; CHECK: %loop.body3 ; CHECK: %loop.inner1.begin -; CHECK: %loop.header ; CHECK: %bail entry: @@ -1491,6 +1491,54 @@ ret void } +define i32 @not_rotate_if_extra_branch(i32 %count) { +; Test checks that there is no loop rotation +; if it introduces extra branch. +; Specifically in this case because best exit is .header +; but it has fallthrough to .middle block and last block in +; loop chain .slow does not have afallthrough to .header. +; CHECK-LABEL: not_rotate_if_extra_branch +; CHECK: %.entry +; CHECK: %.header +; CHECK: %.middle +; CHECK: %.backedge +; CHECK: %.slow +; CHECK: %.bailout +; CHECK: %.stop +.entry: + %sum.0 = shl nsw i32 %count, 1 + br label %.header + +.header: + %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ] + %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ] + %is_exc = icmp sgt i32 %i, 9000000 + br i1 %is_exc, label %.bailout, label %.middle, !prof !13 + +.bailout: + %sum.2 = add nsw i32 %count, 1 + br label %.stop + +.middle: + %pr.1 = and i32 %i, 1023 + %pr.2 = icmp eq i32 %pr.1, 0 + br i1 %pr.2, label %.slow, label %.backedge, !prof !14 + +.slow: + tail call void @effect(i32 %sum) + br label %.backedge + +.backedge: + %sum.1 = add nsw i32 %i, %sum + %i.1 = add nsw i32 %i, 1 + %end = icmp slt i32 %i.1, %count + br i1 %end, label %.header, label %.stop, !prof !15 + +.stop: + %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ] + ret i32 %sum.phi +} + declare void @effect(i32) !5 = !{!"branch_weights", i32 84, i32 16} @@ -1501,3 +1549,6 @@ !10 = !{!"branch_weights", i32 90, i32 10} !11 = !{!"branch_weights", i32 1, i32 1} !12 = !{!"branch_weights", i32 5, i32 3} +!13 = !{!"branch_weights", i32 1, i32 1} +!14 = !{!"branch_weights", i32 1, i32 1023} +!15 = !{!"branch_weights", i32 4095, i32 1} Index: test/CodeGen/X86/code_placement_cold_loop_blocks.ll =================================================================== --- test/CodeGen/X86/code_placement_cold_loop_blocks.ll +++ test/CodeGen/X86/code_placement_cold_loop_blocks.ll @@ -37,7 +37,7 @@ ret void } -define void @nested_loop_0() !prof !1 { +define void @nested_loop_0(i1 %flag) !prof !1 { ; Test if a block that is cold in the inner loop but not cold in the outer loop ; will merged to the outer loop chain. ; @@ -68,8 +68,7 @@ if.else: call void @e() - %call2 = call zeroext i1 @a() - br i1 %call2, label %header2, label %header, !prof !3 + br i1 %flag, label %header2, label %header, !prof !3 end: call void @f()