Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -1944,6 +1944,33 @@ if (ExitIt == LoopChain.end()) return; + // Rotating a loop exit to the bottom when there is a fallthrough to top + // trades the entry fallthrough for an exit fallthrough. + // If there is no bottom->top edge, but the chosen exit block does have + // a fallthrough, we break that fallthrough for nothing in return. + + // Let's consider an example. We have a built chain of basic blocks + // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. + // By doing a rotation we get + // Bk+1, ..., Bn, B1, ..., Bk + // Break of fallthrough to B1 is compensated by a fallthrough from Bk. + // If we had a fallthrough Bk -> Bk+1 it is borken now. + // It might be compensated by fallthrough Bn -> B1. + // So we have a condition to avoid creation of extra branch by loop rotation. + // All below must be true to avoid loop rotation: + // If there is a fallthrough to top (B1) + // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) + // There is no fallthrough from bottom (Bn) to top (B1). + if (ViableTopFallthrough) { + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + if (NextBlockInChain->isPredecessor(ExitingBB)) + if (!Top->isPredecessor(Bottom)) + return; + } + + DEBUG(dbgs() << "Do loop rotation with new exit block " + << getBlockName(ExitingBB) << "\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -354,6 +354,7 @@ ; single-source GCC. ; CHECK-LABEL: unnatural_cfg2 ; CHECK: %entry +; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 ; CHECK: %loop.body4 @@ -361,7 +362,6 @@ ; CHECK: %loop.inner2.begin ; CHECK: %loop.body3 ; CHECK: %loop.inner1.begin -; CHECK: %loop.header ; CHECK: %bail entry: @@ -1491,6 +1491,54 @@ ret void } +define i32 @not_rotate_if_extra_branch(i32 %count) { +; Test checks that there is no loop rotation +; if it introduces extra branch. +; Specifically in this case because best exit is .header +; but it has fallthrough to .middle block and last block in +; loop chain .slow does not have afallthrough to .header. +; CHECK-LABEL: not_rotate_if_extra_branch +; CHECK: %.entry +; CHECK: %.header +; CHECK: %.middle +; CHECK: %.backedge +; CHECK: %.slow +; CHECK: %.bailout +; CHECK: %.stop +.entry: + %sum.0 = shl nsw i32 %count, 1 + br label %.header + +.header: + %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ] + %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ] + %is_exc = icmp sgt i32 %i, 9000000 + br i1 %is_exc, label %.bailout, label %.middle, !prof !13 + +.bailout: + %sum.2 = add nsw i32 %count, 1 + br label %.stop + +.middle: + %pr.1 = and i32 %i, 1023 + %pr.2 = icmp eq i32 %pr.1, 0 + br i1 %pr.2, label %.slow, label %.backedge, !prof !14 + +.slow: + tail call void @effect(i32 %sum) + br label %.backedge + +.backedge: + %sum.1 = add nsw i32 %i, %sum + %i.1 = add nsw i32 %i, 1 + %end = icmp slt i32 %i.1, %count + br i1 %end, label %.header, label %.stop, !prof !15 + +.stop: + %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ] + ret i32 %sum.phi +} + declare void @effect(i32) !5 = !{!"branch_weights", i32 84, i32 16} @@ -1501,3 +1549,6 @@ !10 = !{!"branch_weights", i32 90, i32 10} !11 = !{!"branch_weights", i32 1, i32 1} !12 = !{!"branch_weights", i32 5, i32 3} +!13 = !{!"branch_weights", i32 1, i32 1} +!14 = !{!"branch_weights", i32 1, i32 1023} +!15 = !{!"branch_weights", i32 4095, i32 1} Index: test/CodeGen/X86/code_placement_cold_loop_blocks.ll =================================================================== --- test/CodeGen/X86/code_placement_cold_loop_blocks.ll +++ test/CodeGen/X86/code_placement_cold_loop_blocks.ll @@ -37,7 +37,7 @@ ret void } -define void @nested_loop_0() !prof !1 { +define void @nested_loop_0(i1 %flag) !prof !1 { ; Test if a block that is cold in the inner loop but not cold in the outer loop ; will merged to the outer loop chain. ; @@ -68,8 +68,7 @@ if.else: call void @e() - %call2 = call zeroext i1 @a() - br i1 %call2, label %header2, label %header, !prof !3 + br i1 %flag, label %header2, label %header, !prof !3 end: call void @f()