diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49487,6 +49487,29 @@ return SDValue(); } +static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + MVT VT = N->getSimpleValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // If the second operand is a constant, we can use the number of ones in it + // to determine the number of low bits read from the other input. + if (auto *C = dyn_cast(Op1)) { + uint64_t Count = C->getAPIntValue().countPopulation(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedMask(APInt::getLowBitsSet(VT.getSizeInBits(), Count)); + if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI)) { + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); + return SDValue(N, 0); + } + } + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -49657,6 +49680,7 @@ case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget); case X86ISD::VBROADCAST_LOAD: return combineVBROADCAST_LOAD(N, DAG, DCI); case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); + case X86ISD::PDEP: return combinePDEP(N, DAG, DCI); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/bmi2-x86_64.ll b/llvm/test/CodeGen/X86/bmi2-x86_64.ll --- a/llvm/test/CodeGen/X86/bmi2-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi2-x86_64.ll @@ -44,9 +44,9 @@ define i64 @pdep64_anyext(i32 %x) { ; CHECK-LABEL: pdep64_anyext: ; CHECK: # %bb.0: -; CHECK-NEXT: movslq %edi, %rax -; CHECK-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; CHECK-NEXT: pdepq %rcx, %rax, %rax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555 +; CHECK-NEXT: pdepq %rax, %rdi, %rax ; CHECK-NEXT: retq %x1 = sext i32 %x to i64 %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x1, i64 6148914691236517205) diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll --- a/llvm/test/CodeGen/X86/bmi2.ll +++ b/llvm/test/CodeGen/X86/bmi2.ll @@ -86,9 +86,8 @@ ; ; X64-LABEL: pdep32_anyext: ; X64: # %bb.0: -; X64-NEXT: movswl %di, %eax -; X64-NEXT: movl $-1431655766, %ecx # imm = 0xAAAAAAAA -; X64-NEXT: pdepl %ecx, %eax, %eax +; X64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA +; X64-NEXT: pdepl %eax, %edi, %eax ; X64-NEXT: retq %x1 = sext i16 %x to i32 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766)