diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2542,6 +2542,13 @@ bool IsAdd = N->getOpcode() == ISD::ADD; SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); + + // Since we are looking for (srl x, N-1) which is either 0/1, we can ignore + // trunc / zero-extends as the value is equal either way. + while (ShiftOp.getOpcode() == ISD::TRUNCATE || + ShiftOp.getOpcode() == ISD::ZERO_EXTEND) + ShiftOp = ShiftOp.getOperand(0); + if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) || ShiftOp.getOpcode() != ISD::SRL) return SDValue(); @@ -2552,21 +2559,25 @@ return SDValue(); // The shift must be moving the sign bit to the least-significant-bit. - EVT VT = ShiftOp.getValueType(); + EVT ShiftVT = ShiftOp.getValueType(); SDValue ShAmt = ShiftOp.getOperand(1); ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); - if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1)) + if (!ShAmtC || ShAmtC->getAPIntValue() != (ShiftVT.getScalarSizeInBits() - 1)) return SDValue(); // Eliminate the 'not' by adjusting the shift and add/sub constant: // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) SDLoc DL(N); + EVT VT = N->getValueType(0); if (SDValue NewC = DAG.FoldConstantArithmetic( IsAdd ? ISD::ADD : ISD::SUB, DL, VT, {ConstantOp, DAG.getConstant(1, DL, VT)})) { - SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT, + SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, ShiftVT, Not.getOperand(0), ShAmt); + if (ShiftVT != VT) + NewShift = DAG.getZExtOrTrunc(NewShift, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC); } @@ -12691,19 +12702,28 @@ // setge X, C is canonicalized to setgt, so we do not need to match that // pattern. The setlt sibling is folded in SimplifySelectCC() because it does // not require the 'not' op. - if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { + // NOTE: We can perform this for ZEXT if shift type != ZEXT type as (srl (not + // X), N-1) is 0/1 so fits in any time. We avoid i1/i8 because we DON'T want + // to do this if we are directly returning the result of the setcc which goes + // into i1/i8. + if (CC == ISD::SETGT && isAllOnesConstant(Ones) && + (VT == XVT || + (VT.getSizeInBits() > 8 && N->getOpcode() == ISD::ZERO_EXTEND))) { // Invert and smear/shift the sign bit: // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) SDLoc DL(N); - unsigned ShCt = VT.getSizeInBits() - 1; + unsigned ShCt = XVT.getSizeInBits() - 1; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { - SDValue NotX = DAG.getNOT(DL, X, VT); - SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT); + SDValue NotX = DAG.getNOT(DL, X, XVT); + SDValue ShiftAmount = DAG.getConstant(ShCt, DL, XVT); auto ShiftOpcode = - N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; - return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + SDValue R = DAG.getNode(ShiftOpcode, DL, XVT, NotX, ShiftAmount); + if (VT != XVT) + R = DAG.getZExtOrTrunc(R, DL, VT); + return R; } } return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll b/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll --- a/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll @@ -8,8 +8,9 @@ define i32 @t1(i64 %a) { ; CHECK-LABEL: t1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #63 -; CHECK-NEXT: eor w0, w8, #0x1 +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: lsr x0, x8, #63 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret ; %cmp = icmp sgt i64 %a, -1 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll @@ -1638,8 +1638,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __gekf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 @@ -1930,8 +1930,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __ltkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 @@ -2225,8 +2225,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __gekf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 @@ -2517,8 +2517,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __ltkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -18,8 +18,8 @@ ; CHECK-LABEL: all_sign_bits_clear: ; CHECK: # %bb.0: ; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp sgt i32 %P, -1 %b = icmp sgt i32 %Q, -1 @@ -100,8 +100,8 @@ ; CHECK-LABEL: any_sign_bits_clear: ; CHECK: # %bb.0: ; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp sgt i32 %P, -1 %b = icmp sgt i32 %Q, -1 diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll --- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll +++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll @@ -6,8 +6,8 @@ define i32 @zext_ifpos(i32 %x) { ; CHECK-LABEL: zext_ifpos: ; CHECK: # %bb.0: +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -45,9 +45,9 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_tval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 -; CHECK-NEXT: addi 3, 3, 41 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: addi 3, 3, 42 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 42, i32 41 @@ -97,8 +97,7 @@ ; CHECK-LABEL: sel_ifpos_fval_bigger: ; CHECK: # %bb.0: ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 -; CHECK-NEXT: subfic 3, 3, 42 +; CHECK-NEXT: addi 3, 3, 41 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 41, i32 42 diff --git a/llvm/test/CodeGen/RISCV/select-constant-xor.ll b/llvm/test/CodeGen/RISCV/select-constant-xor.ll --- a/llvm/test/CodeGen/RISCV/select-constant-xor.ll +++ b/llvm/test/CodeGen/RISCV/select-constant-xor.ll @@ -48,10 +48,10 @@ define i32 @selecti64i32(i64 %a) { ; RV32-LABEL: selecti64i32: ; RV32: # %bb.0: -; RV32-NEXT: slti a0, a1, 0 -; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: srli a0, a1, 31 ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: selecti64i32: diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll @@ -108,8 +108,8 @@ ; ; RV64-LABEL: pos_sel_special_constant: ; RV64: # %bb.0: -; RV64-NEXT: slti a0, a0, 0 -; RV64-NEXT: xori a0, a0, 1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srliw a0, a0, 31 ; RV64-NEXT: slli a0, a0, 9 ; RV64-NEXT: ret %tmp.1 = icmp sgt i32 %a, -1 diff --git a/llvm/test/CodeGen/X86/icmp-opt.ll b/llvm/test/CodeGen/X86/icmp-opt.ll --- a/llvm/test/CodeGen/X86/icmp-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-opt.ll @@ -10,16 +10,18 @@ define i32 @t1(i64 %a) { ; CHECK-NOBMI-LABEL: t1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %eax, %eax -; CHECK-NOBMI-NEXT: testq %rdi, %rdi -; CHECK-NOBMI-NEXT: setns %al +; CHECK-NOBMI-NEXT: movq %rdi, %rax +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: shrq $63, %rax +; CHECK-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: t1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %eax, %eax -; CHECK-BMI-NEXT: testq %rdi, %rdi -; CHECK-BMI-NEXT: setns %al +; CHECK-BMI-NEXT: movq %rdi, %rax +; CHECK-BMI-NEXT: notq %rax +; CHECK-BMI-NEXT: shrq $63, %rax +; CHECK-BMI-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-BMI-NEXT: retq %cmp = icmp sgt i64 %a, -1 %conv = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/X86/select-constant-xor.ll b/llvm/test/CodeGen/X86/select-constant-xor.ll --- a/llvm/test/CodeGen/X86/select-constant-xor.ll +++ b/llvm/test/CodeGen/X86/select-constant-xor.ll @@ -47,20 +47,15 @@ define i32 @selecti64i32(i64 %a) { ; X86-LABEL: selecti64i32: ; X86: # %bb.0: -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %cl -; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $31, %eax +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: retl ; ; X64-LABEL: selecti64i32: ; X64: # %bb.0: -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: setns %cl -; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: shrq $63, %rdi +; X64-NEXT: leal 2147483647(%rdi), %eax ; X64-NEXT: retq %c = icmp sgt i64 %a, -1 %s = select i1 %c, i32 2147483647, i32 -2147483648 @@ -260,20 +255,16 @@ define i32 @selecti16i32_offby1(i16 %a) { ; X86-LABEL: selecti16i32_offby1: ; X86: # %bb.0: -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %cl -; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $15, %eax +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: retl ; ; X64-LABEL: selecti16i32_offby1: ; X64: # %bb.0: -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testw %di, %di -; X64-NEXT: setns %cl -; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrl $15, %eax +; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X64-NEXT: retq %c = icmp sgt i16 %a, -1 %s = select i1 %c, i32 2147483647, i32 -2147483648 @@ -283,20 +274,17 @@ define i32 @selecti8i32_offby1(i8 %a) { ; X86-LABEL: selecti8i32_offby1: ; X86: # %bb.0: -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %cl -; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrb $7, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: retl ; ; X64-LABEL: selecti8i32_offby1: ; X64: # %bb.0: -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testb %dil, %dil -; X64-NEXT: setns %cl -; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: shrb $7, %dil +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X64-NEXT: retq %c = icmp sgt i8 %a, -1 %s = select i1 %c, i32 2147483647, i32 -2147483648 diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -393,22 +393,22 @@ define x86_fp80 @test7(i32 %tmp8) nounwind { ; GENERIC-LABEL: test7: ; GENERIC: ## %bb.0: -; GENERIC-NEXT: xorl %eax, %eax -; GENERIC-NEXT: testl %edi, %edi -; GENERIC-NEXT: setns %al -; GENERIC-NEXT: shlq $4, %rax -; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx -; GENERIC-NEXT: fldt (%rax,%rcx) +; GENERIC-NEXT: ## kill: def $edi killed $edi def $rdi +; GENERIC-NEXT: notl %edi +; GENERIC-NEXT: shrl $31, %edi +; GENERIC-NEXT: shlq $4, %rdi +; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; GENERIC-NEXT: fldt (%rdi,%rax) ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test7: ; ATOM: ## %bb.0: -; ATOM-NEXT: xorl %eax, %eax -; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx -; ATOM-NEXT: testl %edi, %edi -; ATOM-NEXT: setns %al -; ATOM-NEXT: shlq $4, %rax -; ATOM-NEXT: fldt (%rax,%rcx) +; ATOM-NEXT: ## kill: def $edi killed $edi def $rdi +; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; ATOM-NEXT: notl %edi +; ATOM-NEXT: shrl $31, %edi +; ATOM-NEXT: shlq $4, %rdi +; ATOM-NEXT: fldt (%rdi,%rax) ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test7: