diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3137,7 +3137,8 @@ /// Return true if creating a shift of the type by the given /// amount is not profitable. - virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { + virtual bool shouldAvoidTransformToShift(const SDNode *N, EVT VT, + unsigned Amount) const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2549,6 +2549,13 @@ bool IsAdd = N->getOpcode() == ISD::ADD; SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); + + // Since we are looking for (srl x, N-1) which is either 0/1, we can ignore + // trunc / zero-extends as the value is equal either way. + while (ShiftOp.getOpcode() == ISD::TRUNCATE || + ShiftOp.getOpcode() == ISD::ZERO_EXTEND) + ShiftOp = ShiftOp.getOperand(0); + if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) || ShiftOp.getOpcode() != ISD::SRL) return SDValue(); @@ -2559,21 +2566,24 @@ return SDValue(); // The shift must be moving the sign bit to the least-significant-bit. - EVT VT = ShiftOp.getValueType(); + EVT ShiftVT = ShiftOp.getValueType(); SDValue ShAmt = ShiftOp.getOperand(1); ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); - if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1)) + if (!ShAmtC || ShAmtC->getAPIntValue() != (ShiftVT.getScalarSizeInBits() - 1)) return SDValue(); // Eliminate the 'not' by adjusting the shift and add/sub constant: // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) SDLoc DL(N); + EVT VT = N->getValueType(0); if (SDValue NewC = DAG.FoldConstantArithmetic( IsAdd ? ISD::ADD : ISD::SUB, DL, VT, {ConstantOp, DAG.getConstant(1, DL, VT)})) { - SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT, + SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, ShiftVT, Not.getOperand(0), ShAmt); + NewShift = IsAdd ? DAG.getSExtOrTrunc(NewShift, DL, VT) + : DAG.getZExtOrTrunc(NewShift, DL, VT); return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC); } @@ -12785,19 +12795,26 @@ // setge X, C is canonicalized to setgt, so we do not need to match that // pattern. The setlt sibling is folded in SimplifySelectCC() because it does // not require the 'not' op. - if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { + // NOTE: We can perform this for ZEXT if shift type != ZEXT type as (srl (not + // X), N-1) is 0/1 so fits in any time. We avoid i1/i8 because we DON'T want + // to do this if we are directly returning the result of the setcc which goes + // into i1/i8. + if (CC == ISD::SETGT && isAllOnesConstant(Ones) && + (VT == XVT || + (N->getOpcode() == ISD::ZERO_EXTEND))) { // Invert and smear/shift the sign bit: // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) SDLoc DL(N); - unsigned ShCt = VT.getSizeInBits() - 1; + unsigned ShCt = XVT.getSizeInBits() - 1; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { - SDValue NotX = DAG.getNOT(DL, X, VT); - SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT); + if (!TLI.shouldAvoidTransformToShift(N, VT, ShCt)) { + SDValue NotX = DAG.getNOT(DL, X, XVT); + SDValue ShiftAmount = DAG.getConstant(ShCt, DL, XVT); auto ShiftOpcode = - N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; - return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + SDValue R = DAG.getNode(ShiftOpcode, DL, XVT, NotX, ShiftAmount); + return DAG.getZExtOrTrunc(R, DL, VT); } } return SDValue(); @@ -26032,7 +26049,7 @@ auto *N2C = dyn_cast(N2.getNode()); if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; - if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) { + if (!TLI.shouldAvoidTransformToShift(nullptr, XType, ShCt)) { SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); @@ -26050,7 +26067,7 @@ } unsigned ShCt = XType.getSizeInBits() - 1; - if (TLI.shouldAvoidTransformToShift(XType, ShCt)) + if (TLI.shouldAvoidTransformToShift(nullptr, XType, ShCt)) return SDValue(); SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); @@ -26263,7 +26280,7 @@ // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); unsigned ShCt = AndMask.getBitWidth() - 1; - if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { + if (!TLI.shouldAvoidTransformToShift(nullptr ,VT, ShCt)) { SDValue ShlAmt = DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); @@ -26320,7 +26337,7 @@ return Temp; unsigned ShCt = N2C->getAPIntValue().logBase2(); - if (TLI.shouldAvoidTransformToShift(VT, ShCt)) + if (TLI.shouldAvoidTransformToShift(nullptr, VT, ShCt)) return SDValue(); // shl setcc result by log2 n2c @@ -26370,7 +26387,8 @@ N2C->getAPIntValue() == ~N3C->getAPIntValue() && ((N1C->isAllOnes() && CC == ISD::SETGT) || (N1C->isZero() && CC == ISD::SETLT)) && - !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { + !TLI.shouldAvoidTransformToShift(nullptr, VT, + CmpOpVT.getScalarSizeInBits() - 1)) { SDValue ASR = DAG.getNode( ISD::SRA, DL, CmpOpVT, N0, DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4840,7 +4840,7 @@ // Perform the xform if the AND RHS is a single bit. unsigned ShCt = AndRHS->getAPIntValue().logBase2(); if (AndRHS->getAPIntValue().isPowerOf2() && - !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { + !TLI.shouldAvoidTransformToShift(nullptr, ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -4850,7 +4850,7 @@ // Perform the xform if C1 is a single bit. unsigned ShCt = C1.logBase2(); if (C1.isPowerOf2() && - !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { + !TLI.shouldAvoidTransformToShift(nullptr, ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -4869,7 +4869,7 @@ const APInt &AndRHSC = AndRHS->getAPIntValue(); if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countr_zero(); - if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + if (!TLI.shouldAvoidTransformToShift(nullptr, ShValTy, ShiftBits)) { SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0), DAG.getConstant(ShiftBits, dl, ShiftTy)); @@ -4898,7 +4898,7 @@ NewC.lshrInPlace(ShiftBits); if (ShiftBits && NewC.getSignificantBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue()) && - !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + !TLI.shouldAvoidTransformToShift(nullptr, ShValTy, ShiftBits)) { SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/llvm/lib/Target/MSP430/MSP430ISelLowering.h --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.h +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.h @@ -128,7 +128,8 @@ bool isZExtFree(EVT VT1, EVT VT2) const override; bool isLegalICmpImmediate(int64_t) const override; - bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const override; + bool shouldAvoidTransformToShift(const SDNode *N, EVT VT, + unsigned Amount) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -358,9 +358,10 @@ } // Define non profitable transforms into shifts -bool MSP430TargetLowering::shouldAvoidTransformToShift(EVT VT, +bool MSP430TargetLowering::shouldAvoidTransformToShift(const SDNode *N, EVT VT, unsigned Amount) const { - return !(Amount == 8 || Amount == 9 || Amount<=2); + return !(Amount == 8 || Amount == 9 || Amount <= 2); + (void)N; } // Implemented to verify test case assertions in diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1157,6 +1157,14 @@ bool shouldSplatInsEltVarIndex(EVT VT) const override; + bool shouldAvoidTransformToShift(const SDNode *N, EVT VT, + unsigned Amount) const override { + if (N == nullptr || N->getOpcode() != ISD::ZERO_EXTEND || + VT.getSizeInBits() > 8) + return false; + return true; + } + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { // Converting to sat variants holds little benefit on X86 as we will just // need to saturate the value back using fp arithmatic. diff --git a/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll b/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll --- a/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-icmp-opt.ll @@ -8,8 +8,9 @@ define i32 @t1(i64 %a) { ; CHECK-LABEL: t1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #63 -; CHECK-NEXT: eor w0, w8, #0x1 +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: lsr x0, x8, #63 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret ; %cmp = icmp sgt i64 %a, -1 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll @@ -1638,8 +1638,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __gekf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 @@ -1930,8 +1930,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __ltkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 @@ -2225,8 +2225,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __gekf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 @@ -2517,8 +2517,8 @@ ; NOVSX-NEXT: std r0, 48(r1) ; NOVSX-NEXT: bl __ltkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: not r3, r3 ; NOVSX-NEXT: rlwinm r3, r3, 1, 31, 31 -; NOVSX-NEXT: xori r3, r3, 1 ; NOVSX-NEXT: addi r1, r1, 32 ; NOVSX-NEXT: ld r0, 16(r1) ; NOVSX-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -18,8 +18,8 @@ ; CHECK-LABEL: all_sign_bits_clear: ; CHECK: # %bb.0: ; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp sgt i32 %P, -1 %b = icmp sgt i32 %Q, -1 @@ -100,8 +100,8 @@ ; CHECK-LABEL: any_sign_bits_clear: ; CHECK: # %bb.0: ; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp sgt i32 %P, -1 %b = icmp sgt i32 %Q, -1 diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll --- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll +++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll @@ -6,8 +6,8 @@ define i32 @zext_ifpos(i32 %x) { ; CHECK-LABEL: zext_ifpos: ; CHECK: # %bb.0: +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -45,9 +45,8 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_tval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 -; CHECK-NEXT: addi 3, 3, 41 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: addi 3, 3, 42 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 42, i32 41 @@ -97,8 +96,7 @@ ; CHECK-LABEL: sel_ifpos_fval_bigger: ; CHECK: # %bb.0: ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 -; CHECK-NEXT: xori 3, 3, 1 -; CHECK-NEXT: subfic 3, 3, 42 +; CHECK-NEXT: addi 3, 3, 41 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 41, i32 42 diff --git a/llvm/test/CodeGen/RISCV/select-constant-xor.ll b/llvm/test/CodeGen/RISCV/select-constant-xor.ll --- a/llvm/test/CodeGen/RISCV/select-constant-xor.ll +++ b/llvm/test/CodeGen/RISCV/select-constant-xor.ll @@ -48,10 +48,10 @@ define i32 @selecti64i32(i64 %a) { ; RV32-LABEL: selecti64i32: ; RV32: # %bb.0: -; RV32-NEXT: slti a0, a1, 0 -; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: srli a0, a1, 31 ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: selecti64i32: diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll @@ -108,8 +108,8 @@ ; ; RV64-LABEL: pos_sel_special_constant: ; RV64: # %bb.0: -; RV64-NEXT: slti a0, a0, 0 -; RV64-NEXT: xori a0, a0, 1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srliw a0, a0, 31 ; RV64-NEXT: slli a0, a0, 9 ; RV64-NEXT: ret %tmp.1 = icmp sgt i32 %a, -1 diff --git a/llvm/test/CodeGen/X86/icmp-opt.ll b/llvm/test/CodeGen/X86/icmp-opt.ll --- a/llvm/test/CodeGen/X86/icmp-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-opt.ll @@ -10,16 +10,18 @@ define i32 @t1(i64 %a) { ; CHECK-NOBMI-LABEL: t1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %eax, %eax -; CHECK-NOBMI-NEXT: testq %rdi, %rdi -; CHECK-NOBMI-NEXT: setns %al +; CHECK-NOBMI-NEXT: movq %rdi, %rax +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: shrq $63, %rax +; CHECK-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: t1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %eax, %eax -; CHECK-BMI-NEXT: testq %rdi, %rdi -; CHECK-BMI-NEXT: setns %al +; CHECK-BMI-NEXT: movq %rdi, %rax +; CHECK-BMI-NEXT: notq %rax +; CHECK-BMI-NEXT: shrq $63, %rax +; CHECK-BMI-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-BMI-NEXT: retq %cmp = icmp sgt i64 %a, -1 %conv = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/X86/select-constant-xor.ll b/llvm/test/CodeGen/X86/select-constant-xor.ll --- a/llvm/test/CodeGen/X86/select-constant-xor.ll +++ b/llvm/test/CodeGen/X86/select-constant-xor.ll @@ -47,18 +47,15 @@ define i32 @selecti64i32(i64 %a) { ; X86-LABEL: selecti64i32: ; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: sets %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $31, %eax ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: retl ; ; X64-LABEL: selecti64i32: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: sets %al -; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: shrq $63, %rdi +; X64-NEXT: leal 2147483647(%rdi), %eax ; X64-NEXT: retq %c = icmp sgt i64 %a, -1 %s = select i1 %c, i32 2147483647, i32 -2147483648 @@ -259,17 +256,15 @@ define i32 @selecti16i32_offby1(i16 %a) { ; X86-LABEL: selecti16i32_offby1: ; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; X86-NEXT: sets %al +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $15, %eax ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: retl ; ; X64-LABEL: selecti16i32_offby1: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testw %di, %di -; X64-NEXT: sets %al +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrl $15, %eax ; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X64-NEXT: retq %c = icmp sgt i16 %a, -1 @@ -280,17 +275,16 @@ define i32 @selecti8i32_offby1(i8 %a) { ; X86-LABEL: selecti8i32_offby1: ; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) -; X86-NEXT: sets %al +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrb $7, %al +; X86-NEXT: movzbl %al, %eax ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: retl ; ; X64-LABEL: selecti8i32_offby1: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testb %dil, %dil -; X64-NEXT: sets %al +; X64-NEXT: shrb $7, %dil +; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; X64-NEXT: retq %c = icmp sgt i8 %a, -1 @@ -302,18 +296,15 @@ define i32 @selecti64i32_offby1_add(i64 %a) { ; X86-LABEL: selecti64i32_offby1_add: ; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %al -; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; X86-NEXT: retl ; ; X64-LABEL: selecti64i32_offby1_add: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: setns %al -; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: sarq $63, %rdi +; X64-NEXT: leal -2147483648(%rdi), %eax ; X64-NEXT: retq %c = icmp sgt i64 %a, -1 %s = select i1 %c, i32 2147483648, i32 2147483647 @@ -323,18 +314,16 @@ define i32 @selecti16i32_offby1_add(i16 %a) { ; X86-LABEL: selecti16i32_offby1_add: ; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %al -; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarl $15, %eax +; X86-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; X86-NEXT: retl ; ; X64-LABEL: selecti16i32_offby1_add: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testw %di, %di -; X64-NEXT: setns %al -; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: movswl %di, %eax +; X64-NEXT: sarl $15, %eax +; X64-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; X64-NEXT: retq %c = icmp sgt i16 %a, -1 %s = select i1 %c, i32 2147483648, i32 2147483647 @@ -344,18 +333,17 @@ define i32 @selecti8i32_offby1_add(i8 %a) { ; X86-LABEL: selecti8i32_offby1_add: ; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %al -; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarb $7, %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; X86-NEXT: retl ; ; X64-LABEL: selecti8i32_offby1_add: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testb %dil, %dil -; X64-NEXT: setns %al -; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: sarb $7, %dil +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; X64-NEXT: retq %c = icmp sgt i8 %a, -1 %s = select i1 %c, i32 2147483648, i32 2147483647 diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -393,22 +393,22 @@ define x86_fp80 @test7(i32 %tmp8) nounwind { ; GENERIC-LABEL: test7: ; GENERIC: ## %bb.0: -; GENERIC-NEXT: xorl %eax, %eax -; GENERIC-NEXT: testl %edi, %edi -; GENERIC-NEXT: setns %al -; GENERIC-NEXT: shlq $4, %rax -; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx -; GENERIC-NEXT: fldt (%rax,%rcx) +; GENERIC-NEXT: ## kill: def $edi killed $edi def $rdi +; GENERIC-NEXT: notl %edi +; GENERIC-NEXT: shrl $31, %edi +; GENERIC-NEXT: shlq $4, %rdi +; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; GENERIC-NEXT: fldt (%rdi,%rax) ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test7: ; ATOM: ## %bb.0: -; ATOM-NEXT: xorl %eax, %eax -; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx -; ATOM-NEXT: testl %edi, %edi -; ATOM-NEXT: setns %al -; ATOM-NEXT: shlq $4, %rax -; ATOM-NEXT: fldt (%rax,%rcx) +; ATOM-NEXT: ## kill: def $edi killed $edi def $rdi +; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; ATOM-NEXT: notl %edi +; ATOM-NEXT: shrl $31, %edi +; ATOM-NEXT: shlq $4, %rdi +; ATOM-NEXT: fldt (%rdi,%rax) ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test7: