Index: lib/CodeGen/MachineSink.cpp =================================================================== --- lib/CodeGen/MachineSink.cpp +++ lib/CodeGen/MachineSink.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -74,6 +75,7 @@ MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; const MachineBlockFrequencyInfo *MBFI; + const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; // Remember which edges have been considered for breaking. @@ -105,6 +107,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -283,6 +286,7 @@ PDT = &getAnalysis(); LI = &getAnalysis(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; + MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); bool EverMadeChange = false; @@ -383,6 +387,9 @@ if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) return true; + if (MBPI->getEdgeProbability(From, To) < BranchProbability(50, 100)) + return true; + // MI is cheap, we probably don't want to break the critical edge for it. // However, if this would allow some definitions of its source operands // to be sunk then it's probably worth it. Index: test/CodeGen/ARM/atomic-cmpxchg.ll =================================================================== --- test/CodeGen/ARM/atomic-cmpxchg.ll +++ test/CodeGen/ARM/atomic-cmpxchg.ll @@ -38,16 +38,14 @@ ; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 ; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]: ; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-ARMV6-NEXT: mov [[RES:r[0-9]+]], #0 ; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]] -; CHECK-ARMV6-NEXT: bne [[END:.LBB[0-9_]+]] +; CHECK-ARMV6-NEXT: movne [[RES:r[0-9]+]], #0 +; CHECK-ARMV6-NEXT: bxne lr ; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] -; CHECK-ARMV6-NEXT: mov [[RES]], #1 ; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV6-NEXT: bne [[TRY]] -; CHECK-ARMV6-NEXT: [[END]]: -; CHECK-ARMV6-NEXT: mov r0, [[RES]] -; CHECK-ARMV6-NEXT: bx lr +; CHECK-ARMV6-NEXT: moveq [[RES]], #1 +; CHECK-ARMV6-NEXT: bxeq lr +; CHECK-ARMV6-NEXT: b [[TRY]] ; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1 @@ -64,20 +62,18 @@ ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV7-NEXT: .fnstart ; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 -; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]: -; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]] -; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]] +; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]] +; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]: ; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] -; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1 ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV7-NEXT: bne [[TRY]] -; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]] -; CHECK-ARMV7-NEXT: [[FAIL]]: +; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1 +; CHECK-ARMV7-NEXT: bxeq lr +; CHECK-ARMV7-NEXT: [[TRY]]: +; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] +; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]] +; CHECK-ARMV7-NEXT: beq [[HEAD]] ; CHECK-ARMV7-NEXT: clrex ; CHECK-ARMV7-NEXT: mov [[RES]], #0 -; CHECK-ARMV7-NEXT: [[END]]: -; CHECK-ARMV7-NEXT: mov r0, [[RES]] ; CHECK-ARMV7-NEXT: bx lr ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: Index: test/CodeGen/ARM/code-placement.ll =================================================================== --- test/CodeGen/ARM/code-placement.ll +++ test/CodeGen/ARM/code-placement.ll @@ -12,9 +12,9 @@ br i1 %0, label %bb2, label %bb bb: -; CHECK: LBB0_2: -; CHECK: bne LBB0_2 -; CHECK-NOT: b LBB0_2 +; CHECK: LBB0_1: +; CHECK: bne LBB0_1 +; CHECK-NOT: b LBB0_1 ; CHECK: bx lr %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] @@ -34,14 +34,13 @@ define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly { entry: ; CHECK-LABEL: t2: -; CHECK: beq LBB1_[[RET:.]] %0 = icmp eq i32 %passes, 0 ; [#uses=1] br i1 %0, label %bb5, label %bb.nph15 -; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader bb1: ; preds = %bb2.preheader, %bb1 -; CHECK: LBB1_[[BB1:.]]: @ %bb1 -; CHECK: bne LBB1_[[BB1]] +; CHECK: LBB1_[[BB3:.]]: @ %bb3 +; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader +; CHECK: blt LBB1_[[BB3]] %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; [#uses=2] %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; [#uses=1] %tmp17 = sub i32 %i.07, %indvar ; [#uses=1] @@ -53,9 +52,9 @@ br i1 %exitcond, label %bb3, label %bb1 bb3: ; preds = %bb1, %bb2.preheader -; CHECK: LBB1_[[BB3:.]]: @ %bb3 -; CHECK: bne LBB1_[[PREHDR]] -; CHECK-NOT: b LBB1_ +; CHECK: LBB1_[[BB1:.]]: @ %bb1 +; CHECK: bne LBB1_[[BB1]] +; CHECK: b LBB1_[[BB3]] %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; [#uses=2] %3 = add i32 %pass.011, 1 ; [#uses=2] %exitcond18 = icmp eq i32 %3, %passes ; [#uses=1] @@ -71,8 +70,6 @@ %sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; [#uses=2] br i1 %4, label %bb1, label %bb3 -; CHECK: LBB1_[[RET]]: @ %bb5 -; CHECK: pop bb5: ; preds = %bb3, %entry %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; [#uses=1] ret i32 %sum.1.lcssa Index: test/CodeGen/X86/clz.ll =================================================================== --- test/CodeGen/X86/clz.ll +++ test/CodeGen/X86/clz.ll @@ -279,28 +279,32 @@ define i8 @ctlz_i8_zero_test(i8 %n) { ; X32-LABEL: ctlz_i8_zero_test: ; X32: # BB#0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl -; X32-NEXT: movb $8, %al -; X32-NEXT: testb %cl, %cl -; X32-NEXT: je .LBB8_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: movzbl %cl, %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: testb %al, %al +; X32-NEXT: je .LBB8_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $7, %eax -; X32-NEXT: .LBB8_2: # %cond.end +; X32-NEXT: # kill: %AL %AL %EAX +; X32-NEXT: retl +; X32-NEXT: .LBB8_1: +; X32-NEXT: movb $8, %al ; X32-NEXT: # kill: %AL %AL %EAX ; X32-NEXT: retl ; ; X64-LABEL: ctlz_i8_zero_test: ; X64: # BB#0: -; X64-NEXT: movb $8, %al ; X64-NEXT: testb %dil, %dil -; X64-NEXT: je .LBB8_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB8_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: bsrl %eax, %eax ; X64-NEXT: xorl $7, %eax -; X64-NEXT: .LBB8_2: # %cond.end +; X64-NEXT: # kill: %AL %AL %EAX +; X64-NEXT: retq +; X64-NEXT: .LBB8_1: +; X64-NEXT: movb $8, %al ; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; @@ -327,26 +331,30 @@ define i16 @ctlz_i16_zero_test(i16 %n) { ; X32-LABEL: ctlz_i16_zero_test: ; X32: # BB#0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movw $16, %ax -; X32-NEXT: testw %cx, %cx -; X32-NEXT: je .LBB9_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: bsrw %cx, %ax +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: testw %ax, %ax +; X32-NEXT: je .LBB9_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: bsrw %ax, %ax ; X32-NEXT: xorl $15, %eax -; X32-NEXT: .LBB9_2: # %cond.end +; X32-NEXT: # kill: %AX %AX %EAX +; X32-NEXT: retl +; X32-NEXT: .LBB9_1: +; X32-NEXT: movw $16, %ax ; X32-NEXT: # kill: %AX %AX %EAX ; X32-NEXT: retl ; ; X64-LABEL: ctlz_i16_zero_test: ; X64: # BB#0: -; X64-NEXT: movw $16, %ax ; X64-NEXT: testw %di, %di -; X64-NEXT: je .LBB9_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB9_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: bsrw %di, %ax ; X64-NEXT: xorl $15, %eax -; X64-NEXT: .LBB9_2: # %cond.end +; X64-NEXT: # kill: %AX %AX %EAX +; X64-NEXT: retq +; X64-NEXT: .LBB9_1: +; X64-NEXT: movw $16, %ax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; @@ -367,25 +375,27 @@ define i32 @ctlz_i32_zero_test(i32 %n) { ; X32-LABEL: ctlz_i32_zero_test: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32, %eax -; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: je .LBB10_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: bsrl %ecx, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: testl %eax, %eax +; X32-NEXT: je .LBB10_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $31, %eax -; X32-NEXT: .LBB10_2: # %cond.end +; X32-NEXT: retl +; X32-NEXT: .LBB10_1: +; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: ctlz_i32_zero_test: ; X64: # BB#0: -; X64-NEXT: movl $32, %eax ; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB10_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB10_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: xorl $31, %eax -; X64-NEXT: .LBB10_2: # %cond.end +; X64-NEXT: retq +; X64-NEXT: .LBB10_1: +; X64-NEXT: movl $32, %eax ; X64-NEXT: retq ; ; X32-CLZ-LABEL: ctlz_i32_zero_test: @@ -464,26 +474,30 @@ define i8 @cttz_i8_zero_test(i8 %n) { ; X32-LABEL: cttz_i8_zero_test: ; X32: # BB#0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl -; X32-NEXT: movb $8, %al -; X32-NEXT: testb %cl, %cl -; X32-NEXT: je .LBB12_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: movzbl %cl, %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: testb %al, %al +; X32-NEXT: je .LBB12_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: bsfl %eax, %eax -; X32-NEXT: .LBB12_2: # %cond.end +; X32-NEXT: # kill: %AL %AL %EAX +; X32-NEXT: retl +; X32-NEXT: .LBB12_1 +; X32-NEXT: movb $8, %al ; X32-NEXT: # kill: %AL %AL %EAX ; X32-NEXT: retl ; ; X64-LABEL: cttz_i8_zero_test: ; X64: # BB#0: -; X64-NEXT: movb $8, %al ; X64-NEXT: testb %dil, %dil -; X64-NEXT: je .LBB12_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB12_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: bsfl %eax, %eax -; X64-NEXT: .LBB12_2: # %cond.end +; X64-NEXT: # kill: %AL %AL %EAX +; X64-NEXT: retq +; X64-NEXT: .LBB12_1: +; X64-NEXT: movb $8, %al ; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; @@ -510,23 +524,25 @@ define i16 @cttz_i16_zero_test(i16 %n) { ; X32-LABEL: cttz_i16_zero_test: ; X32: # BB#0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: testw %ax, %ax +; X32-NEXT: je .LBB13_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: bsfw %ax, %ax +; X32-NEXT: retl +; X32-NEXT: .LBB13_1 ; X32-NEXT: movw $16, %ax -; X32-NEXT: testw %cx, %cx -; X32-NEXT: je .LBB13_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: bsfw %cx, %ax -; X32-NEXT: .LBB13_2: # %cond.end ; X32-NEXT: retl ; ; X64-LABEL: cttz_i16_zero_test: ; X64: # BB#0: -; X64-NEXT: movw $16, %ax ; X64-NEXT: testw %di, %di -; X64-NEXT: je .LBB13_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB13_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: bsfw %di, %ax -; X64-NEXT: .LBB13_2: # %cond.end +; X64-NEXT: retq +; X64-NEXT: .LBB13_1: +; X64-NEXT: movw $16, %ax ; X64-NEXT: retq ; ; X32-CLZ-LABEL: cttz_i16_zero_test: @@ -546,23 +562,25 @@ define i32 @cttz_i32_zero_test(i32 %n) { ; X32-LABEL: cttz_i32_zero_test: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: testl %eax, %eax +; X32-NEXT: je .LBB14_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: bsfl %eax, %eax +; X32-NEXT: retl +; X32-NEXT: .LBB14_1 ; X32-NEXT: movl $32, %eax -; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: je .LBB14_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: bsfl %ecx, %eax -; X32-NEXT: .LBB14_2: # %cond.end ; X32-NEXT: retl ; ; X64-LABEL: cttz_i32_zero_test: ; X64: # BB#0: -; X64-NEXT: movl $32, %eax ; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB14_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB14_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: bsfl %edi, %eax -; X64-NEXT: .LBB14_2: # %cond.end +; X64-NEXT: retq +; X64-NEXT: .LBB14_1: +; X64-NEXT: movl $32, %eax ; X64-NEXT: retq ; ; X32-CLZ-LABEL: cttz_i32_zero_test: @@ -642,25 +660,27 @@ define i32 @ctlz_i32_fold_cmov(i32 %n) { ; X32-LABEL: ctlz_i32_fold_cmov: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: orl $1, %ecx -; X32-NEXT: movl $32, %eax -; X32-NEXT: je .LBB16_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: bsrl %ecx, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl $1, %eax +; X32-NEXT: je .LBB16_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $31, %eax -; X32-NEXT: .LBB16_2: # %cond.end +; X32-NEXT: retl +; X32-NEXT: .LBB16_1 +; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: ctlz_i32_fold_cmov: ; X64: # BB#0: ; X64-NEXT: orl $1, %edi -; X64-NEXT: movl $32, %eax -; X64-NEXT: je .LBB16_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB16_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: xorl $31, %eax -; X64-NEXT: .LBB16_2: # %cond.end +; X64-NEXT: retq +; X64-NEXT: .LBB16_1: +; X64-NEXT: movl $32, %eax ; X64-NEXT: retq ; ; X32-CLZ-LABEL: ctlz_i32_fold_cmov: @@ -716,26 +736,30 @@ define i32 @ctlz_bsr_zero_test(i32 %n) { ; X32-LABEL: ctlz_bsr_zero_test: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32, %eax -; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: je .LBB18_2 -; X32-NEXT: # BB#1: # %cond.false -; X32-NEXT: bsrl %ecx, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: testl %eax, %eax +; X32-NEXT: je .LBB18_1 +; X32-NEXT: # BB#2: # %cond.false +; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $31, %eax -; X32-NEXT: .LBB18_2: # %cond.end +; X32-NEXT: xorl $31, %eax +; X32-NEXT: retl +; X32-NEXT: .LBB18_1: +; X32-NEXT: movl $32, %eax ; X32-NEXT: xorl $31, %eax ; X32-NEXT: retl ; ; X64-LABEL: ctlz_bsr_zero_test: ; X64: # BB#0: -; X64-NEXT: movl $32, %eax ; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB18_2 -; X64-NEXT: # BB#1: # %cond.false +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # BB#2: # %cond.false ; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: xorl $31, %eax -; X64-NEXT: .LBB18_2: # %cond.end +; X64-NEXT: xorl $31, %eax +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: +; X64-NEXT: movl $32, %eax ; X64-NEXT: xorl $31, %eax ; X64-NEXT: retq ; Index: test/CodeGen/X86/phys_subreg_coalesce-2.ll =================================================================== --- test/CodeGen/X86/phys_subreg_coalesce-2.ll +++ test/CodeGen/X86/phys_subreg_coalesce-2.ll @@ -15,6 +15,7 @@ ret i32 0 ; CHECK: forbody{{$}} ; CHECK-NOT: mov +; CHECK: jbe forbody: ; preds = %forbody, %forcond.preheader %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; [#uses=3] %accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ] ; [#uses=1] Index: test/CodeGen/X86/pr2659.ll =================================================================== --- test/CodeGen/X86/pr2659.ll +++ test/CodeGen/X86/pr2659.ll @@ -14,7 +14,7 @@ br i1 %cmp44, label %afterfor, label %forbody ; CHECK: %forcond.preheader -; CHECK: movl $1 +; CHECK: testl ; CHECK-NOT: xorl ; CHECK-NOT: movl ; CHECK-NOT: LBB