Index: lib/Target/X86/Utils/X86ShuffleDecode.cpp
===================================================================
--- lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -255,15 +255,13 @@
 
 void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
                           SmallVectorImpl<int> &ShuffleMask) {
-  if (Imm & 0x88)
-    return; // Not a shuffle
-
   unsigned HalfSize = VT.getVectorNumElements() / 2;
 
   for (unsigned l = 0; l != 2; ++l) {
-    unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+    unsigned HalfMask = Imm >> (l * 4);
+    unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
     for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
-      ShuffleMask.push_back(i);
+      ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : i);
   }
 }
 
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -4322,6 +4322,7 @@
 /// IsUnary to true if only uses one source. Note that this will set IsUnary for
 /// shuffles which use a single input multiple times, and in those cases it will
 /// adjust the mask to only have indices within that single input.
+/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
 static bool getTargetShuffleMask(SDNode *N, MVT VT,
                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
   unsigned NumElems = VT.getVectorNumElements();
@@ -4451,6 +4452,9 @@
     ImmN = N->getOperand(N->getNumOperands()-1);
     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     if (Mask.empty()) return false;
+    // Mask only contains -ve index if an element is zero.
+    if (std::any_of(Mask.begin(), Mask.end(), [](int M){ return M < 0; }))
+      return false;
     break;
   case X86ISD::MOVSLDUP:
     DecodeMOVSLDUPMask(VT, Mask);
Index: test/CodeGen/X86/avx-vperm2x128.ll
===================================================================
--- test/CodeGen/X86/avx-vperm2x128.ll
+++ test/CodeGen/X86/avx-vperm2x128.ll
@@ -269,7 +269,7 @@
 define <4 x double> @vperm2z_0x08(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x08:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x double> %s
@@ -279,7 +279,7 @@
 ; ALL-LABEL: vperm2z_0x18:
 ; ALL:       # BB#0:
 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT:    vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   ret <4 x double> %s
@@ -288,7 +288,7 @@
 define <4 x double> @vperm2z_0x28(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x28:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x double> %s
@@ -298,7 +298,7 @@
 ; ALL-LABEL: vperm2z_0x38:
 ; ALL:       # BB#0:
 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT:    vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   ret <4 x double> %s
@@ -307,7 +307,7 @@
 define <4 x double> @vperm2z_0x80(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x80:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x double> %s
@@ -316,7 +316,7 @@
 define <4 x double> @vperm2z_0x81(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x81:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
   ret <4 x double> %s
@@ -325,7 +325,7 @@
 define <4 x double> @vperm2z_0x82(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x82:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x double> %s
@@ -334,7 +334,7 @@
 define <4 x double> @vperm2z_0x83(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x83:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
   ret <4 x double> %s
@@ -345,8 +345,8 @@
 define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
 ; ALL-LABEL: vperm2z_int_0x83:
 ; ALL:       # BB#0:
-; AVX1:    vperm2f128 $129, %ymm0, %ymm0, %ymm0
-; AVX2:    vperm2i128 $129, %ymm0, %ymm0, %ymm0
+; AVX1:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX2:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
   %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
   %c = add <4 x i64> %b, %s
   ret <4 x i64> %c