diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34053,6 +34053,17 @@
     }
     break;
   }
+  case X86ISD::PDEP: {
+    KnownBits Known2;
+    Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    // Zeros are retained from the mask operand. But not ones.
+    Known.One.clearAllBits();
+    // The result will have at least as many trailing zeros as the non-mask
+    // operand since bits can only map to the same or higher bit position.
+    Known.Zero.setLowBits(Known2.countMinTrailingZeros());
+    break;
+  }
   case X86ISD::VTRUNC:
   case X86ISD::VTRUNCS:
   case X86ISD::VTRUNCUS:
@@ -38373,6 +38384,34 @@
 
     break;
   }
+  case X86ISD::PDEP: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    unsigned DemandedBitsLZ = OriginalDemandedBits.countLeadingZeros();
+    APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
+
+    // If the demanded bits has leading zeroes, we don't demand those from the
+    // mask.
+    if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1))
+      return true;
+
+    // The number of possible 1s in the mask determines the number of LSBs of
+    // operand 0 used. Undemanded bits from the mask don't matter so filter
+    // them before counting.
+    KnownBits Known2;
+    uint64_t Count = (~Known.Zero & LoMask).countPopulation();
+    APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count));
+    if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1))
+      return true;
+
+    // Zeroes are retained from the mask, but not ones.
+    Known.One.clearAllBits();
+    // The result will have at least as many trailing zeros as the non-mask
+    // operand since bits can only map to the same or higher bit position.
+    Known.Zero.setLowBits(Known2.countMinTrailingZeros());
+    return false;
+  }
   }
 
   return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -49580,6 +49619,17 @@
   return SDValue();
 }
 
+static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
+                           TargetLowering::DAGCombinerInfo &DCI) {
+  unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+                               APInt::getAllOnesValue(NumBits), DCI))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -49750,6 +49800,7 @@
   case ISD::FP_ROUND:       return combineFP_ROUND(N, DAG, Subtarget);
   case X86ISD::VBROADCAST_LOAD: return combineVBROADCAST_LOAD(N, DAG, DCI);
   case X86ISD::MOVDQ2Q:     return combineMOVDQ2Q(N, DAG);
+  case X86ISD::PDEP:        return combinePDEP(N, DAG, DCI);
   }
 
   return SDValue();
diff --git a/llvm/test/CodeGen/X86/bmi2-x86_64.ll b/llvm/test/CodeGen/X86/bmi2-x86_64.ll
--- a/llvm/test/CodeGen/X86/bmi2-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi2-x86_64.ll
@@ -44,9 +44,9 @@
 define i64 @pdep64_anyext(i32 %x)   {
 ; CHECK-LABEL: pdep64_anyext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; CHECK-NEXT:    pdepq %rcx, %rax, %rax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
+; CHECK-NEXT:    pdepq %rax, %rdi, %rax
 ; CHECK-NEXT:    retq
   %x1 = sext i32 %x to i64
   %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x1, i64 6148914691236517205)
diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll
--- a/llvm/test/CodeGen/X86/bmi2.ll
+++ b/llvm/test/CodeGen/X86/bmi2.ll
@@ -86,9 +86,8 @@
 ;
 ; X64-LABEL: pdep32_anyext:
 ; X64:       # %bb.0:
-; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    movl $-1431655766, %ecx # imm = 0xAAAAAAAA
-; X64-NEXT:    pdepl %ecx, %eax, %eax
+; X64-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X64-NEXT:    pdepl %eax, %edi, %eax
 ; X64-NEXT:    retq
   %x1 = sext i16 %x to i32
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766)
@@ -101,14 +100,12 @@
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl $1431655765, %ecx # imm = 0x55555555
 ; X86-NEXT:    pdepl %ecx, %eax, %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: pdep32_demandedbits:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl $1431655765, %eax # imm = 0x55555555
 ; X64-NEXT:    pdepl %eax, %edi, %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
 ; X64-NEXT:    retq
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
   %tmp2 = and i32 %tmp, 1431655765
@@ -125,8 +122,7 @@
 ;
 ; X64-LABEL: pdep32_demandedbits2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    pdepl %esi, %eax, %eax
+; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    andl $128, %eax
 ; X64-NEXT:    retq
   %tmp = and i32 %x, 255
@@ -146,8 +142,7 @@
 ;
 ; X64-LABEL: pdep32_demandedbits_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    movswl %si, %eax
-; X64-NEXT:    pdepl %eax, %edi, %eax
+; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    andl $32768, %eax # imm = 0x8000
 ; X64-NEXT:    retq
   %tmp = sext i16 %y to i32
@@ -167,8 +162,7 @@
 ;
 ; X64-LABEL: pdep32_demandedbits_mask2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movswl %si, %eax
-; X64-NEXT:    pdepl %eax, %edi, %eax
+; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    retq
   %tmp = sext i16 %y to i32
@@ -182,19 +176,15 @@
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    pdepl %ecx, %eax, %ecx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    pdepl %ecx, %eax, %eax
+; X86-NEXT:    imull %eax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: pdep32_knownbits:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    pdepl %eax, %edi, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    imull %ecx, %eax
+; X64-NEXT:    pdepl %eax, %edi, %eax
+; X64-NEXT:    imull %eax, %eax
 ; X64-NEXT:    retq
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
   %tmp2 = and i32 %tmp, 1431655765
@@ -207,19 +197,15 @@
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl $-256, %eax
 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pdepl {{[0-9]+}}(%esp), %eax, %ecx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    andl $-256, %eax
-; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    pdepl {{[0-9]+}}(%esp), %eax, %eax
+; X86-NEXT:    imull %eax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: pdep32_knownbits2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    andl $-256, %edi
-; X64-NEXT:    pdepl %esi, %edi, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    andl $-256, %eax
-; X64-NEXT:    imull %ecx, %eax
+; X64-NEXT:    pdepl %esi, %edi, %eax
+; X64-NEXT:    imull %eax, %eax
 ; X64-NEXT:    retq
   %tmp = and i32 %x, -256
   %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)