Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -2735,6 +2735,16 @@ const SelectionDAG &DAG, unsigned Depth = 0) const; + /// This method can be implemented by targets that want to perform demanded + /// bits simplifications on and across target nodes. If target implements + /// this they must always compute Known or defer back to the default + /// implementation. + virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const; + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1256,6 +1256,14 @@ LLVM_FALLTHROUGH; } default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + if (SimplifyDemandedBitsForTargetNode(Op, NewMask, Known, TLO, Depth)) + return true; + + // Expect Known handled by above. + break; + } + // Just use computeKnownBits to compute output bits. TLO.DAG.computeKnownBits(Op, Known, Depth); break; @@ -1323,6 +1331,23 @@ return 1; } +bool +TargetLowering::SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const { + assert(Op.getOpcode() >= ISD::BUILTIN_OP_END && + "Should use SimplifyDemandedBits if you don't know whether Op" + " is a target node!"); + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -852,6 +852,13 @@ const SelectionDAG &DAG, unsigned Depth) const override; + // Simplify demanded bits for target specific nodes. + bool SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const override; + SDValue unwrapAddress(SDValue N) const override; bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -28199,6 +28199,78 @@ return 1; } +bool +X86TargetLowering::SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned Opc = Op.getOpcode(); + assert((Opc >= ISD::BUILTIN_OP_END || + Opc == ISD::INTRINSIC_WO_CHAIN || + Opc == ISD::INTRINSIC_W_CHAIN || + Opc == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + + switch (Opc) { + case X86ISD::CMOV: { + // Try to shrink constants feeding the CMOV. + // TODO: Should we recursively simplify other operands. + if (auto *Op0C = dyn_cast(Op.getOperand(0))) { + const APInt &C = Op0C->getAPIntValue(); + if (!C.isSubsetOf(DemandedMask)) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + // Make sure we don't mess with 64-bit constants unless they fit + // a sign extended 32-bit value. + APInt NC = DemandedMask & C; + if (C.getMinSignedBits() > 32 || NC.getMinSignedBits() <= 32) { + SDValue NewC = TLO.DAG.getConstant(NC, DL, VT); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, VT, NewC, + Op.getOperand(1), Op.getOperand(2), + Op.getOperand(3)); + return TLO.CombineTo(Op, NewOp); + } + } + } + + if (auto *Op1C = dyn_cast(Op.getOperand(1))) { + const APInt &C = Op1C->getAPIntValue(); + if (!C.isSubsetOf(DemandedMask)) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + // Make sure we don't mess with 64-bit constants unless they fit + // a sign extended 32-bit value. + APInt NC = DemandedMask & C; + if (C.getMinSignedBits() > 32 || NC.getMinSignedBits() <= 32) { + SDValue NewC = TLO.DAG.getConstant(NC, DL, VT); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, VT, + Op.getOperand(0), NewC, + Op.getOperand(2), Op.getOperand(3)); + return TLO.CombineTo(Op, NewOp); + } + } + } + + // Fall back to computing known bits. + TLO.DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + return false; + KnownBits Known2; + TLO.DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1); + + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + return false; + } + } + + return TargetLowering::SimplifyDemandedBitsForTargetNode(Op, DemandedMask, + Known, TLO, Depth); +} + SDValue X86TargetLowering::unwrapAddress(SDValue N) const { if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP) return N->getOperand(0); Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -43,7 +43,7 @@ ; GENERIC-NEXT: callq _return_false ; GENERIC-NEXT: xorl %ecx, %ecx ; GENERIC-NEXT: testb $1, %al -; GENERIC-NEXT: movl $-480, %eax ## imm = 0xFE20 +; GENERIC-NEXT: movl $536870432, %eax ## imm = 0x1FFFFE20 ; GENERIC-NEXT: cmovnel %ecx, %eax ; GENERIC-NEXT: shll $3, %eax ; GENERIC-NEXT: cmpl $32768, %eax ## imm = 0x8000 @@ -59,7 +59,7 @@ ; ATOM-NEXT: pushq %rax ; ATOM-NEXT: callq _return_false ; ATOM-NEXT: xorl %ecx, %ecx -; ATOM-NEXT: movl $-480, %edx ## imm = 0xFE20 +; ATOM-NEXT: movl $536870432, %edx ## imm = 0x1FFFFE20 ; ATOM-NEXT: testb $1, %al ; ATOM-NEXT: cmovnel %ecx, %edx ; ATOM-NEXT: shll $3, %edx @@ -78,7 +78,7 @@ ; MCU-NEXT: testb $1, %al ; MCU-NEXT: jne .LBB1_2 ; MCU-NEXT: # %bb.1: # %entry -; MCU-NEXT: movl $-480, %ecx # imm = 0xFE20 +; MCU-NEXT: movl $536870432, %ecx # imm = 0x1FFFFE20 ; MCU-NEXT: .LBB1_2: # %entry ; MCU-NEXT: shll $3, %ecx ; MCU-NEXT: cmpl $32768, %ecx # imm = 0x8000 Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -484,7 +484,7 @@ ; CHECK-LABEL: opaque_constant: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: movl $23, %ecx +; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: movq $-4, %rax ; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001 Index: test/CodeGen/X86/vector-compare-all_of.ll =================================================================== --- test/CodeGen/X86/vector-compare-all_of.ll +++ test/CodeGen/X86/vector-compare-all_of.ll @@ -603,22 +603,20 @@ ; SSE-LABEL: test_v8i16_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: cmovel %ecx, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i16_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX-NEXT: cmovel %ecx, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; @@ -652,11 +650,10 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: cmovel %ecx, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; @@ -725,11 +722,10 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: cmovel %ecx, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; @@ -740,11 +736,10 @@ ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX1-NEXT: movl $-1, %eax -; AVX1-NEXT: cmovnel %ecx, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX1-NEXT: cmovel %ecx, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -754,11 +749,10 @@ ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX2-NEXT: movl $-1, %eax -; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX2-NEXT: cmovel %ecx, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -802,7 +796,7 @@ ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax +; SSE-NEXT: movl $255, %eax ; SSE-NEXT: cmovnel %ecx, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -813,7 +807,7 @@ ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax +; AVX-NEXT: movl $255, %eax ; AVX-NEXT: cmovnel %ecx, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq @@ -855,7 +849,7 @@ ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax +; SSE-NEXT: movl $255, %eax ; SSE-NEXT: cmovnel %ecx, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq