Index: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp @@ -1917,6 +1917,12 @@ return; MachineBasicBlock *Top = *LoopChain.begin(); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + + // If ExitingBB is already the last one in a chain then nothing to do. + if (Bottom == ExitingBB) + return; + bool ViableTopFallthrough = false; for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; @@ -1931,7 +1937,6 @@ // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock *Succ : Bottom->successors()) { BlockChain *SuccChain = BlockToChain[Succ]; if (!LoopBlockSet.count(Succ) && @@ -1944,6 +1949,36 @@ if (ExitIt == LoopChain.end()) return; + // Rotating a loop exit to the bottom when there is a fallthrough to top + // trades the entry fallthrough for an exit fallthrough. + // If there is no bottom->top edge, but the chosen exit block does have + // a fallthrough, we break that fallthrough for nothing in return. + + // Let's consider an example. We have a built chain of basic blocks + // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. + // By doing a rotation we get + // Bk+1, ..., Bn, B1, ..., Bk + // Break of fallthrough to B1 is compensated by a fallthrough from Bk. + // If we had a fallthrough Bk -> Bk+1 it is broken now. + // It might be compensated by fallthrough Bn -> B1. + // So we have a condition to avoid creation of extra branch by loop rotation. + // All below must be true to avoid loop rotation: + // If there is a fallthrough to top (B1) + // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) + // There is no fallthrough from bottom (Bn) to top (B1). + // Please note that there is no exit fallthrough from Bn because we checked it + // above. + if (ViableTopFallthrough) { + assert(std::next(ExitIt) != LoopChain.end() && + "Exit should not be last BB"); + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + if (ExitingBB->isSuccessor(NextBlockInChain)) + if (!Bottom->isSuccessor(Top)) + return; + } + + DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) + << " at bottom\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } Index: llvm/trunk/test/CodeGen/X86/block-placement.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/block-placement.ll +++ llvm/trunk/test/CodeGen/X86/block-placement.ll @@ -354,6 +354,7 @@ ; single-source GCC. ; CHECK-LABEL: unnatural_cfg2 ; CHECK: %entry +; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 ; CHECK: %loop.body4 @@ -361,7 +362,6 @@ ; CHECK: %loop.inner2.begin ; CHECK: %loop.body3 ; CHECK: %loop.inner1.begin -; CHECK: %loop.header ; CHECK: %bail entry: @@ -1491,6 +1491,102 @@ ret void } +define i32 @not_rotate_if_extra_branch(i32 %count) { +; Test checks that there is no loop rotation +; if it introduces extra branch. +; Specifically in this case because best exit is .header +; but it has fallthrough to .middle block and last block in +; loop chain .slow does not have afallthrough to .header. +; CHECK-LABEL: not_rotate_if_extra_branch +; CHECK: %.entry +; CHECK: %.header +; CHECK: %.middle +; CHECK: %.backedge +; CHECK: %.slow +; CHECK: %.bailout +; CHECK: %.stop +.entry: + %sum.0 = shl nsw i32 %count, 1 + br label %.header + +.header: + %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ] + %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ] + %is_exc = icmp sgt i32 %i, 9000000 + br i1 %is_exc, label %.bailout, label %.middle, !prof !13 + +.bailout: + %sum.2 = add nsw i32 %count, 1 + br label %.stop + +.middle: + %pr.1 = and i32 %i, 1023 + %pr.2 = icmp eq i32 %pr.1, 0 + br i1 %pr.2, label %.slow, label %.backedge, !prof !14 + +.slow: + tail call void @effect(i32 %sum) + br label %.backedge + +.backedge: + %sum.1 = add nsw i32 %i, %sum + %i.1 = add nsw i32 %i, 1 + %end = icmp slt i32 %i.1, %count + br i1 %end, label %.header, label %.stop, !prof !15 + +.stop: + %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ] + ret i32 %sum.phi +} + +define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { +; This is a regression test against patch avoid loop rotation if +; it introduce an extra btanch. +; CHECK-LABEL: not_rotate_if_extra_branch_regression +; CHECK: %.entry +; CHECK: %.first_backedge +; CHECK: %.slow +; CHECK: %.second_header +.entry: + %sum.0 = shl nsw i32 %count, 1 + br label %.first_header + +.first_header: + %i = phi i32 [ %i.1, %.first_backedge ], [ 0, %.entry ] + %is_bo1 = icmp sgt i32 %i, 9000000 + br i1 %is_bo1, label %.bailout, label %.first_backedge, !prof !14 + +.first_backedge: + %i.1 = add nsw i32 %i, 1 + %end = icmp slt i32 %i.1, %count + br i1 %end, label %.first_header, label %.second_header, !prof !13 + +.second_header: + %j = phi i32 [ %j.1, %.second_backedge ], [ %init, %.first_backedge ] + %end.2 = icmp sgt i32 %j, %count + br i1 %end.2, label %.stop, label %.second_middle, !prof !14 + +.second_middle: + %is_slow = icmp sgt i32 %j, 9000000 + br i1 %is_slow, label %.slow, label %.second_backedge, !prof !14 + +.slow: + tail call void @effect(i32 %j) + br label %.second_backedge + +.second_backedge: + %j.1 = add nsw i32 %j, 1 + %end.3 = icmp slt i32 %j, 10000000 + br i1 %end.3, label %.second_header, label %.stop, !prof !13 + +.stop: + %res = add nsw i32 %j, %i.1 + ret i32 %res + +.bailout: + ret i32 0 +} + declare void @effect(i32) !5 = !{!"branch_weights", i32 84, i32 16} @@ -1501,3 +1597,6 @@ !10 = !{!"branch_weights", i32 90, i32 10} !11 = !{!"branch_weights", i32 1, i32 1} !12 = !{!"branch_weights", i32 5, i32 3} +!13 = !{!"branch_weights", i32 1, i32 1} +!14 = !{!"branch_weights", i32 1, i32 1023} +!15 = !{!"branch_weights", i32 4095, i32 1} Index: llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll +++ llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll @@ -37,7 +37,7 @@ ret void } -define void @nested_loop_0() !prof !1 { +define void @nested_loop_0(i1 %flag) !prof !1 { ; Test if a block that is cold in the inner loop but not cold in the outer loop ; will merged to the outer loop chain. ; @@ -68,8 +68,7 @@ if.else: call void @e() - %call2 = call zeroext i1 @a() - br i1 %call2, label %header2, label %header, !prof !3 + br i1 %flag, label %header2, label %header, !prof !3 end: call void @f()