Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2654,7 +2654,7 @@ bool AssumeSingleUse = false) const; /// Helper wrapper around SimplifyDemandedBits - bool SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const; /// Determine which of the bits specified in Mask are known to be either zero @@ -2676,6 +2676,16 @@ const SelectionDAG &DAG, unsigned Depth = 0) const; + /// This method can be implemented by targets that want to perform demanded + /// bits simplifications on and across target nodes. If target implements + /// this they must always compute Known or defer back to the default + /// implementation. + virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const; + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -479,7 +479,7 @@ return true; } -bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1272,6 +1272,14 @@ LLVM_FALLTHROUGH; } default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + if (SimplifyDemandedBitsForTargetNode(Op, NewMask, Known, TLO, Depth)) + return true; + + // Expect Known handled by above. + break; + } + // Just use computeKnownBits to compute output bits. TLO.DAG.computeKnownBits(Op, Known, Depth); break; @@ -1327,6 +1335,23 @@ return 1; } +bool +TargetLowering::SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const { + assert(Op.getOpcode() >= ISD::BUILTIN_OP_END && + "Should use SimplifyDemandedBits if you don't know whether Op" + " is a target node!"); + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -830,6 +830,13 @@ const SelectionDAG &DAG, unsigned Depth) const override; + // Simplify demanded bits for target specific nodes. + bool SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const override; + SDValue unwrapAddress(SDValue N) const override; bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27250,6 +27250,78 @@ return 1; } +bool +X86TargetLowering::SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned Opc = Op.getOpcode(); + assert((Opc >= ISD::BUILTIN_OP_END || + Opc == ISD::INTRINSIC_WO_CHAIN || + Opc == ISD::INTRINSIC_W_CHAIN || + Opc == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + + switch (Opc) { + case X86ISD::CMOV: { + if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, Known, TLO, + Depth+1)) + return true; + KnownBits Known2; + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, Known2, TLO, + Depth+1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + if (auto *Op0C = dyn_cast(Op.getOperand(0))) { + const APInt &C = Op0C->getAPIntValue(); + if (!C.isSubsetOf(DemandedMask)) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + // Make sure we don't mess with 64-bit constants unless they fit + // a sign extended 32-bit value. + APInt NC = DemandedMask & C; + if (C.getMinSignedBits() > 32 || NC.getMinSignedBits() <= 32) { + SDValue NewC = TLO.DAG.getConstant(NC, DL, VT); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, VT, NewC, + Op.getOperand(1), Op.getOperand(2), + Op.getOperand(3)); + return TLO.CombineTo(Op, NewOp); + } + } + } + + if (auto *Op1C = dyn_cast(Op.getOperand(1))) { + const APInt &C = Op1C->getAPIntValue(); + if (!C.isSubsetOf(DemandedMask)) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + // Make sure we don't mess with 64-bit constants unless they fit + // a sign extended 32-bit value. + APInt NC = DemandedMask & C; + if (C.getMinSignedBits() > 32 || NC.getMinSignedBits() <= 32) { + SDValue NewC = TLO.DAG.getConstant(NC, DL, VT); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, VT, + Op.getOperand(0), NewC, + Op.getOperand(2), Op.getOperand(3)); + return TLO.CombineTo(Op, NewOp); + } + } + } + + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + return false; + } + } + + return TargetLowering::SimplifyDemandedBitsForTargetNode(Op, DemandedMask, + Known, TLO, Depth); +} + SDValue X86TargetLowering::unwrapAddress(SDValue N) const { if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP) return N->getOperand(0); Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -45,7 +45,7 @@ ; GENERIC-NEXT: callq _return_false ; GENERIC-NEXT: xorl %ecx, %ecx ; GENERIC-NEXT: testb $1, %al -; GENERIC-NEXT: movl $-480, %eax +; GENERIC-NEXT: movl $536870432, %eax ## imm = 0x1FFFFE20 ; GENERIC-NEXT: cmovnel %ecx, %eax ; GENERIC-NEXT: shll $3, %eax ; GENERIC-NEXT: cmpl $32768, %eax ## imm = 0x8000 @@ -55,14 +55,13 @@ ; GENERIC-NEXT: popq %rcx ; GENERIC-NEXT: retq ; GENERIC-NEXT: LBB1_1: ## %bb90 -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test2: ; ATOM: ## BB#0: ## %entry ; ATOM-NEXT: pushq %rax ; ATOM-NEXT: callq _return_false ; ATOM-NEXT: xorl %ecx, %ecx -; ATOM-NEXT: movl $-480, %edx +; ATOM-NEXT: movl $536870432, %edx ## imm = 0x1FFFFE20 ; ATOM-NEXT: testb $1, %al ; ATOM-NEXT: cmovnel %ecx, %edx ; ATOM-NEXT: shll $3, %edx @@ -73,17 +72,16 @@ ; ATOM-NEXT: popq %rcx ; ATOM-NEXT: retq ; ATOM-NEXT: LBB1_1: ## %bb90 -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test2: ; MCU: # BB#0: # %entry ; MCU-NEXT: calll return_false -; MCU-NEXT: xorl %ecx, %ecx +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: testb $1, %al ; MCU-NEXT: jne .LBB1_2 ; MCU-NEXT: # BB#1: # %entry -; MCU-NEXT: movl $-480, %ecx # imm = 0xFE20 -; MCU-NEXT: .LBB1_2: +; MCU-NEXT: movl $536870432, %ecx # imm = 0x1FFFFE20 +; MCU-NEXT: .LBB1_2: # %entry ; MCU-NEXT: shll $3, %ecx ; MCU-NEXT: cmpl $32768, %ecx # imm = 0x8000 ; MCU-NEXT: jge .LBB1_3 Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -484,7 +484,7 @@ ; CHECK-LABEL: opaque_constant: ; CHECK: # BB#0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: movl $23, %ecx +; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: movq $-4, %rax ; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001 Index: test/CodeGen/X86/vector-compare-all_of.ll =================================================================== --- test/CodeGen/X86/vector-compare-all_of.ll +++ test/CodeGen/X86/vector-compare-all_of.ll @@ -619,22 +619,20 @@ ; SSE-LABEL: test_v8i16_sext: ; SSE: # BB#0: ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: cmovel %ecx, %eax ; SSE-NEXT: # kill: %AX %AX %EAX ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i16_sext: ; AVX: # BB#0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX-NEXT: cmovel %ecx, %eax ; AVX-NEXT: # kill: %AX %AX %EAX ; AVX-NEXT: retq ; @@ -669,11 +667,10 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: cmovel %ecx, %eax ; SSE-NEXT: # kill: %AX %AX %EAX ; SSE-NEXT: retq ; @@ -743,11 +740,10 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: cmovel %ecx, %eax ; SSE-NEXT: # kill: %AX %AX %EAX ; SSE-NEXT: retq ; @@ -758,11 +754,10 @@ ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX1-NEXT: movl $-1, %eax -; AVX1-NEXT: cmovnel %ecx, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX1-NEXT: cmovel %ecx, %eax ; AVX1-NEXT: # kill: %AX %AX %EAX ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -772,11 +767,10 @@ ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX2-NEXT: movl $-1, %eax -; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX2-NEXT: cmovel %ecx, %eax ; AVX2-NEXT: # kill: %AX %AX %EAX ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -820,7 +814,7 @@ ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax +; SSE-NEXT: movl $255, %eax ; SSE-NEXT: cmovnel %ecx, %eax ; SSE-NEXT: # kill: %AL %AL %EAX ; SSE-NEXT: retq @@ -831,7 +825,7 @@ ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax +; AVX-NEXT: movl $255, %eax ; AVX-NEXT: cmovnel %ecx, %eax ; AVX-NEXT: # kill: %AL %AL %EAX ; AVX-NEXT: retq @@ -874,7 +868,7 @@ ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax +; SSE-NEXT: movl $255, %eax ; SSE-NEXT: cmovnel %ecx, %eax ; SSE-NEXT: # kill: %AL %AL %EAX ; SSE-NEXT: retq