Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8099,6 +8099,37 @@ return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, + bool LegalOperations) { + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); + + SDValue SetCC = N->getOperand(0); + if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || + !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) + return SDValue(); + + SDValue X = SetCC.getOperand(0); + SDValue Ones = SetCC.getOperand(1); + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + EVT VT = N->getValueType(0); + EVT XVT = X.getValueType(); + // setge X, C is canonicalized to setgt, so we do not need to match that + // pattern. The setlt sibling is folded in SimplifySelectCC() becaus it does + // not require the 'not' op. + if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { + // Invert and smear/shift the sign bit: + // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) + // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) + SDLoc DL(N); + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); + auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + } + return SDValue(); +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -8224,6 +8255,9 @@ } } + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; + if (N0.getOpcode() == ISD::SETCC) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); @@ -8510,6 +8544,9 @@ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) return foldedExt; + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; + if (N0.getOpcode() == ISD::SETCC) { // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && Index: llvm/trunk/test/CodeGen/AArch64/signbit-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/signbit-shift.ll +++ llvm/trunk/test/CodeGen/AArch64/signbit-shift.ll @@ -6,8 +6,8 @@ define i32 @zext_ifpos(i32 %x) { ; CHECK-LABEL: zext_ifpos: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #31 -; CHECK-NEXT: eor w0, w8, #0x1 +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: lsr w0, w8, #31 ; CHECK-NEXT: ret %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -17,8 +17,8 @@ define i32 @add_zext_ifpos(i32 %x) { ; CHECK-LABEL: add_zext_ifpos: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #31 -; CHECK-NEXT: eor w8, w8, #0x1 +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: lsr w8, w8, #31 ; CHECK-NEXT: add w0, w8, #41 // =41 ; CHECK-NEXT: ret %c = icmp sgt i32 %x, -1 @@ -42,8 +42,8 @@ define i32 @sext_ifpos(i32 %x) { ; CHECK-LABEL: sext_ifpos: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: eor w0, w8, w0, asr #31 +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: asr w0, w8, #31 ; CHECK-NEXT: ret %c = icmp sgt i32 %x, -1 %e = sext i1 %c to i32 @@ -53,10 +53,9 @@ define i32 @add_sext_ifpos(i32 %x) { ; CHECK-LABEL: add_sext_ifpos: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #31 -; CHECK-NEXT: eor w8, w8, #0x1 +; CHECK-NEXT: mvn w8, w0 ; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: sub w0, w9, w8, lsr #31 ; CHECK-NEXT: ret %c = icmp sgt i32 %x, -1 %e = sext i1 %c to i32 Index: llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll +++ llvm/trunk/test/CodeGen/AMDGPU/commute-compares.ll @@ -114,7 +114,7 @@ } ; GCN-LABEL: {{^}}commute_sgt_neg1_i32: -; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}} +; GCN: v_ashrrev_i32_e32 v2, 31, v2 define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid Index: llvm/trunk/test/CodeGen/PowerPC/signbit-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/signbit-shift.ll +++ llvm/trunk/test/CodeGen/PowerPC/signbit-shift.ll @@ -17,8 +17,8 @@ define i32 @add_zext_ifpos(i32 %x) { ; CHECK-LABEL: add_zext_ifpos: ; CHECK: # %bb.0: +; CHECK-NEXT: nor 3, 3, 3 ; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: addi 3, 3, 41 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 @@ -54,8 +54,8 @@ define i32 @add_sext_ifpos(i32 %x) { ; CHECK-LABEL: add_sext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: srawi 3, 3, 31 ; CHECK-NEXT: nor 3, 3, 3 +; CHECK-NEXT: srawi 3, 3, 31 ; CHECK-NEXT: addi 3, 3, 42 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 Index: llvm/trunk/test/CodeGen/PowerPC/testComparesigesll.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/testComparesigesll.ll +++ llvm/trunk/test/CodeGen/PowerPC/testComparesigesll.ll @@ -99,9 +99,9 @@ ; CHECK-LABEL: test_igesll_z_store: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: not r3, r3 ; CHECK-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: rldicl r3, r3, 1, 63 ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: blr entry: @@ -115,9 +115,9 @@ ; CHECK-LABEL: test_igesll_sext_z_store: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-NEXT: sradi r3, r3, 63 -; CHECK-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-NEXT: not r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: sradi r3, r3, 63 ; CHECK-NEXT: std r3, ; CHECK-NEXT: blr entry: Index: llvm/trunk/test/CodeGen/PowerPC/testComparesllgesll.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/testComparesllgesll.ll +++ llvm/trunk/test/CodeGen/PowerPC/testComparesllgesll.ll @@ -39,8 +39,8 @@ define i64 @test_llgesll_z(i64 %a) { ; CHECK-LABEL: test_llgesll_z: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: not r3, r3 ; CHECK-NEXT: rldicl r3, r3, 1, 63 -; CHECK-NEXT: xori r3, r3, 1 ; CHECK-NEXT: blr entry: %cmp = icmp sgt i64 %a, -1 @@ -51,8 +51,8 @@ define i64 @test_llgesll_sext_z(i64 %a) { ; CHECK-LABEL: test_llgesll_sext_z: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sradi r3, r3, 63 ; CHECK-NEXT: not r3, r3 +; CHECK-NEXT: sradi r3, r3, 63 ; CHECK-NEXT: blr entry: %cmp = icmp sgt i64 %a, -1 @@ -99,9 +99,9 @@ ; CHECK-LABEL: test_llgesll_z_store: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: not r3, r3 ; CHECK-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: rldicl r3, r3, 1, 63 ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: blr entry: @@ -115,9 +115,9 @@ ; CHECK-LABEL: test_llgesll_sext_z_store: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-NEXT: sradi r3, r3, 63 -; CHECK-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-NEXT: not r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: sradi r3, r3, 63 ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: blr entry: Index: llvm/trunk/test/CodeGen/X86/select.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/select.ll +++ llvm/trunk/test/CodeGen/X86/select.ll @@ -297,11 +297,10 @@ ; ; MCU-LABEL: test7: ; MCU: # %bb.0: -; MCU-NEXT: xorl %ecx, %ecx -; MCU-NEXT: testl %eax, %eax -; MCU-NEXT: setns %cl -; MCU-NEXT: shll $4, %ecx -; MCU-NEXT: fldt {{\.LCPI.*}}(%ecx) +; MCU-NEXT: notl %eax +; MCU-NEXT: shrl $27, %eax +; MCU-NEXT: andl $-16, %eax +; MCU-NEXT: fldt {{\.LCPI.*}}(%eax) ; MCU-NEXT: retl %tmp9 = icmp sgt i32 %tmp8, -1 %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000 Index: llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll +++ llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll @@ -93,18 +93,18 @@ define i32 @pos_sel_constants(i32 %a) { ; CHECK-NOBMI-LABEL: pos_sel_constants: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %eax, %eax -; CHECK-NOBMI-NEXT: testl %edi, %edi -; CHECK-NOBMI-NEXT: setns %al -; CHECK-NOBMI-NEXT: leal (%rax,%rax,4), %eax +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: notl %edi +; CHECK-NOBMI-NEXT: shrl $31, %edi +; CHECK-NOBMI-NEXT: leal (%rdi,%rdi,4), %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: pos_sel_constants: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %eax, %eax -; CHECK-BMI-NEXT: testl %edi, %edi -; CHECK-BMI-NEXT: setns %al -; CHECK-BMI-NEXT: leal (%rax,%rax,4), %eax +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: notl %edi +; CHECK-BMI-NEXT: shrl $31, %edi +; CHECK-BMI-NEXT: leal (%rdi,%rdi,4), %eax ; CHECK-BMI-NEXT: retq %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 5, i32 0 @@ -116,18 +116,18 @@ define i32 @pos_sel_special_constant(i32 %a) { ; CHECK-NOBMI-LABEL: pos_sel_special_constant: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %eax, %eax -; CHECK-NOBMI-NEXT: testl %edi, %edi -; CHECK-NOBMI-NEXT: setns %al -; CHECK-NOBMI-NEXT: shll $9, %eax +; CHECK-NOBMI-NEXT: notl %edi +; CHECK-NOBMI-NEXT: shrl $22, %edi +; CHECK-NOBMI-NEXT: andl $512, %edi # imm = 0x200 +; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: pos_sel_special_constant: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %eax, %eax -; CHECK-BMI-NEXT: testl %edi, %edi -; CHECK-BMI-NEXT: setns %al -; CHECK-BMI-NEXT: shll $9, %eax +; CHECK-BMI-NEXT: notl %edi +; CHECK-BMI-NEXT: shrl $22, %edi +; CHECK-BMI-NEXT: andl $512, %edi # imm = 0x200 +; CHECK-BMI-NEXT: movl %edi, %eax ; CHECK-BMI-NEXT: retq %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 512, i32 0 Index: llvm/trunk/test/CodeGen/X86/signbit-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/signbit-shift.ll +++ llvm/trunk/test/CodeGen/X86/signbit-shift.ll @@ -6,9 +6,9 @@ define i32 @zext_ifpos(i32 %x) { ; CHECK-LABEL: zext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: setns %al +; CHECK-NEXT: notl %edi +; CHECK-NEXT: shrl $31, %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -18,10 +18,10 @@ define i32 @add_zext_ifpos(i32 %x) { ; CHECK-LABEL: add_zext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: setns %al -; CHECK-NEXT: addl $41, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: notl %edi +; CHECK-NEXT: shrl $31, %edi +; CHECK-NEXT: leal 41(%rdi), %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -32,10 +32,10 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_tval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: setns %al -; CHECK-NEXT: addl $41, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: notl %edi +; CHECK-NEXT: shrl $31, %edi +; CHECK-NEXT: leal 41(%rdi), %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 42, i32 41 @@ -45,10 +45,9 @@ define i32 @sext_ifpos(i32 %x) { ; CHECK-LABEL: sext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: setns %al -; CHECK-NEXT: negl %eax +; CHECK-NEXT: notl %edi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = sext i1 %c to i32 @@ -58,11 +57,10 @@ define i32 @add_sext_ifpos(i32 %x) { ; CHECK-LABEL: add_sext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: setns %cl +; CHECK-NEXT: notl %edi +; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: subl %edi, %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = sext i1 %c to i32 @@ -73,11 +71,10 @@ define i32 @sel_ifpos_fval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_fval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: setns %cl +; CHECK-NEXT: notl %edi +; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: subl %edi, %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 41, i32 42 Index: llvm/trunk/test/CodeGen/XCore/ashr.ll =================================================================== --- llvm/trunk/test/CodeGen/XCore/ashr.ll +++ llvm/trunk/test/CodeGen/XCore/ashr.ll @@ -72,5 +72,6 @@ ret i32 %2 } ; CHECK-LABEL: f5: -; CHECK-NEXT: ashr r0, r0, 32 -; CHECK-NEXT: eq r0, r0, 0 +; CHECK-NEXT: not r0, r0 +; CHECK-NEXT: mkmsk r1, 5 +; CHECK-NEXT: shr r0, r0, r1