Index: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp @@ -966,6 +966,16 @@ MachineBasicBlock * MachineBlockPlacement::findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet) { + // Placing the latch block before the header may introduce an extra branch + // that skips this block the first time the loop is executed, which we want + // to avoid when optimising for size. + // FIXME: in theory there is a case that does not introduce a new branch, + // i.e. when the layout predecessor does not fallthrough to the loop header. + // In practice this never happens though: there always seems to be a preheader + // that can fallthrough and that is also placed before the header. + if (F->getFunction()->optForSize()) + return L.getHeader(); + // Check that the header hasn't been fused with a preheader block due to // crazy branches. If it has, we need to start with the header at the top to // prevent pulling the preheader into the loop body. Index: llvm/trunk/test/CodeGen/X86/loop-blocks.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/loop-blocks.ll +++ llvm/trunk/test/CodeGen/X86/loop-blocks.ll @@ -228,6 +228,41 @@ ret void } +; This is exactly the same function as slightly_more_involved. +; The difference is that when optimising for size, we do not want +; to see this reordering. + +; CHECK-LABEL: slightly_more_involved_2: +; CHECK-NOT: jmp .LBB5_1 +; CHECK: .LBB5_1: +; CHECK-NEXT: callq body + +define void @slightly_more_involved_2() #0 { +entry: + br label %loop + +loop: + call void @body() + %t0 = call i32 @get() + %t1 = icmp slt i32 %t0, 2 + br i1 %t1, label %block_a, label %bb + +bb: + %t2 = call i32 @get() + %t3 = icmp slt i32 %t2, 99 + br i1 %t3, label %exit, label %loop + +block_a: + call void @bar99() + br label %loop + +exit: + call void @exit() + ret void +} + +attributes #0 = { minsize norecurse nounwind optsize readnone uwtable } + declare void @bar99() nounwind declare void @bar100() nounwind declare void @bar101() nounwind