Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2743,6 +2743,17 @@ } } + // Prefer an add for more folding potential and possibly better codegen: + // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) + if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { + SDValue ShAmt = N1.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) { + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); + return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); + } + } + return SDValue(); } Index: llvm/trunk/test/CodeGen/AArch64/signbit-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/signbit-shift.ll +++ llvm/trunk/test/CodeGen/AArch64/signbit-shift.ll @@ -150,8 +150,8 @@ define i32 @add_sext_ifneg(i32 %x) { ; CHECK-LABEL: add_sext_ifneg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: sub w0, w8, w0, lsr #31 +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #42 // =42 ; CHECK-NEXT: ret %c = icmp slt i32 %x, 0 %e = sext i1 %c to i32 @@ -225,7 +225,7 @@ define i32 @sub_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: sub_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w0, w1, w0, lsr #31 +; CHECK-NEXT: add w0, w1, w0, asr #31 ; CHECK-NEXT: ret %sh = lshr i32 %x, 31 %r = sub i32 %y, %sh @@ -235,8 +235,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: sub_lshr_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.4s, v0.4s, #31 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ssra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %sh = lshr <4 x i32> %x, %r = sub <4 x i32> %y, %sh @@ -246,8 +246,8 @@ define i32 @sub_const_op_lshr(i32 %x) { ; CHECK-LABEL: sub_const_op_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: sub w0, w8, w0, lsr #31 +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #43 // =43 ; CHECK-NEXT: ret %sh = lshr i32 %x, 31 %r = sub i32 43, %sh @@ -257,9 +257,9 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) { ; CHECK-LABEL: sub_const_op_lshr_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.4s, v0.4s, #31 ; CHECK-NEXT: movi v1.4s, #42 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ssra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %sh = lshr <4 x i32> %x, %r = sub <4 x i32> , %sh Index: llvm/trunk/test/CodeGen/PowerPC/signbit-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/signbit-shift.ll +++ llvm/trunk/test/CodeGen/PowerPC/signbit-shift.ll @@ -243,8 +243,8 @@ define i32 @sub_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: sub_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: subf 3, 3, 4 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: add 3, 4, 3 ; CHECK-NEXT: blr %sh = lshr i32 %x, 31 %r = sub i32 %y, %sh @@ -257,8 +257,8 @@ ; CHECK-NEXT: vspltisw 4, -16 ; CHECK-NEXT: vspltisw 5, 15 ; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: vsrw 2, 2, 4 -; CHECK-NEXT: vsubuwm 2, 3, 2 +; CHECK-NEXT: vsraw 2, 2, 4 +; CHECK-NEXT: vadduwm 2, 3, 2 ; CHECK-NEXT: blr %sh = lshr <4 x i32> %x, %r = sub <4 x i32> %y, %sh @@ -268,8 +268,8 @@ define i32 @sub_const_op_lshr(i32 %x) { ; CHECK-LABEL: sub_const_op_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 43 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: addi 3, 3, 43 ; CHECK-NEXT: blr %sh = lshr i32 %x, 31 %r = sub i32 43, %sh @@ -284,9 +284,9 @@ ; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l ; CHECK-NEXT: vsubuwm 3, 4, 3 -; CHECK-NEXT: vsrw 2, 2, 3 +; CHECK-NEXT: vsraw 2, 2, 3 ; CHECK-NEXT: lvx 3, 0, 3 -; CHECK-NEXT: vsubuwm 2, 3, 2 +; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %sh = lshr <4 x i32> %x, %r = sub <4 x i32> , %sh Index: llvm/trunk/test/CodeGen/X86/signbit-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/signbit-shift.ll +++ llvm/trunk/test/CodeGen/X86/signbit-shift.ll @@ -156,9 +156,9 @@ define i32 @add_sext_ifneg(i32 %x) { ; CHECK-LABEL: add_sext_ifneg: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 0 %e = sext i1 %c to i32 @@ -169,9 +169,9 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifneg_fval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 0 %r = select i1 %c, i32 41, i32 42 @@ -231,9 +231,10 @@ define i32 @sub_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: sub_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: subl %edi, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal (%rdi,%rsi), %eax ; CHECK-NEXT: retq %sh = lshr i32 %x, 31 %r = sub i32 %y, %sh @@ -243,9 +244,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: sub_lshr_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: psrld $31, %xmm0 -; CHECK-NEXT: psubd %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: paddd %xmm1, %xmm0 ; CHECK-NEXT: retq %sh = lshr <4 x i32> %x, %r = sub <4 x i32> %y, %sh @@ -255,9 +255,9 @@ define i32 @sub_const_op_lshr(i32 %x) { ; CHECK-LABEL: sub_const_op_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: xorl $43, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 43(%rdi), %eax ; CHECK-NEXT: retq %sh = lshr i32 %x, 31 %r = sub i32 43, %sh @@ -267,10 +267,8 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) { ; CHECK-LABEL: sub_const_op_lshr_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: psrld $31, %xmm0 -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] -; CHECK-NEXT: psubd %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %sh = lshr <4 x i32> %x, %r = sub <4 x i32> , %sh