Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -26409,8 +26409,7 @@
 
   // look for psign/blend
   if (VT == MVT::v2i64 || VT == MVT::v4i64) {
-    if (!Subtarget.hasSSSE3() ||
-        (VT == MVT::v4i64 && !Subtarget.hasInt256()))
+    if (VT == MVT::v4i64 && !Subtarget.hasInt256())
       return SDValue();
 
     // Canonicalize pandn to RHS
@@ -26446,11 +26445,14 @@
       // there is no psrai.b
       unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
       unsigned SraAmt = ~0;
+      unsigned SRLOpc;
       if (Mask.getOpcode() == ISD::SRA) {
+        SRLOpc = ISD::SRL;
         if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
           if (auto *AmtConst = AmtBV->getConstantSplatNode())
             SraAmt = AmtConst->getZExtValue();
       } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+        SRLOpc = X86ISD::VSRLI;
         SDValue SraC = Mask.getOperand(1);
         SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
       }
@@ -26459,15 +26461,30 @@
 
       SDLoc DL(N);
 
-      // Now we know we at least have a plendvb with the mask val.  See if
-      // we can form a psignb/w/d.
-      // psign = x.type == y.type == mask.type && y = sub(0, x);
+      // Try to match:
+      //   (or (and (M, (sub 0, X)), (pandn M, X)))
+      // which is a special case of vselect:
+      //   (vselect M, (sub 0, X), X)
       if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
           ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
           X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
         assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
                "Unsupported VT for PSIGN");
-        Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
+        // Per:
+        // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+        // We know that, if fNegate is 0 or 1:
+        //   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+        //
+        // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+        //   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+        //   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
+        // This lets us transform our vselect to:
+        //   (add (xor X, M), (and M, 1))
+        // And further to:
+        //   (add (xor X, M), (srl M, EltBits-1))
+        Mask = DAG.getNode(
+            ISD::ADD, DL, MaskVT, DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask),
+            DAG.getNode(SRLOpc, DL, MaskVT, Mask, Mask.getOperand(1)));
         return DAG.getBitcast(VT, Mask);
       }
       // PBLENDVB only available on SSE 4.1
Index: test/CodeGen/X86/avx2-logic.ll
===================================================================
--- test/CodeGen/X86/avx2-logic.ll
+++ test/CodeGen/X86/avx2-logic.ll
@@ -72,7 +72,10 @@
 define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; CHECK-LABEL: signd:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vpsignd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpsrad $31, %ymm1, %ymm1
+; CHECK-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpsrld $31, %ymm1, %ymm1
+; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
 entry:
   %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
Index: test/CodeGen/X86/vec-sign.ll
===================================================================
--- test/CodeGen/X86/vec-sign.ll
+++ test/CodeGen/X86/vec-sign.ll
@@ -3,21 +3,13 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
 
 define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
-; SSE2-LABEL: signd:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    psubd %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm0, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: signd:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    psignd %xmm1, %xmm0
-; SSE41-NEXT:    retq
+; ALL-LABEL: signd:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    psrad $31, %xmm1
+; ALL-NEXT:    pxor %xmm1, %xmm0
+; ALL-NEXT:    psrld $31, %xmm1
+; ALL-NEXT:    paddd %xmm1, %xmm0
+; ALL-NEXT:    retq
 entry:
   %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
   %sub = sub nsw <4 x i32> zeroinitializer, %a