Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -793,7 +793,7 @@
 
     APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
     // If we only care about the highest bit, don't bother shifting right.
-    if (MsbMask == DemandedMask) {
+    if (MsbMask == NewMask) {
       unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
       SDValue InOp = Op.getOperand(0);
 
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp
+++ lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2131,16 +2131,6 @@
   case X86ISD::GlobalBaseReg:
     return getGlobalBaseReg();
 
-  case X86ISD::SHRUNKBLEND: {
-    // SHRUNKBLEND selects like a regular VSELECT.
-    SDValue VSelect = CurDAG->getNode(
-        ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
-        Node->getOperand(1), Node->getOperand(2));
-    ReplaceUses(SDValue(Node, 0), VSelect);
-    SelectCode(VSelect.getNode());
-    // We already called ReplaceUses.
-    return nullptr;
-  }
 
   case ISD::ATOMIC_LOAD_XOR:
   case ISD::ATOMIC_LOAD_AND:
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -190,11 +190,6 @@
       /// BLENDI - Blend where the selector is an immediate.
       BLENDI,
 
-      /// SHRUNKBLEND - Blend where the condition has been shrunk.
-      /// This is used to emphasize that the condition mask is
-      /// no more valid for generic VSELECT optimizations.
-      SHRUNKBLEND,
-
       /// ADDSUB - Combined add and sub on an FP vector.
       ADDSUB,
 
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -19910,7 +19910,6 @@
   case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::PSIGN:              return "X86ISD::PSIGN";
   case X86ISD::BLENDI:             return "X86ISD::BLENDI";
-  case X86ISD::SHRUNKBLEND:        return "X86ISD::SHRUNKBLEND";
   case X86ISD::SUBUS:              return "X86ISD::SUBUS";
   case X86ISD::HADD:               return "X86ISD::HADD";
   case X86ISD::HSUB:               return "X86ISD::HSUB";
@@ -23525,17 +23524,22 @@
       // build_vector of constants. This will be taken care in a later
       // condition.
       (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 &&
-       VT != MVT::v8i16) &&
-      // Don't optimize vector of constants. Those are handled by
-      // the generic code and all the bits must be properly set for
-      // the generic optimizer.
-      !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
+       VT != MVT::v8i16)) {
     unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
 
     // Don't optimize vector selects that map to mask-registers.
     if (BitWidth == 1)
       return SDValue();
 
+    // Check all uses of that condition operand to check whether it will be
+    // consumed by non-BLEND instructions, which may depend on all bits are set
+    // properly.
+    for (SDNode::use_iterator I = Cond->use_begin(),
+                              E = Cond->use_end(); I != E; ++I)
+      if (I->getOpcode() != ISD::VSELECT)
+        // TODO: Add other opcodes eventually lowered into BLEND.
+        return SDValue();
+
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
     APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
 
@@ -23543,45 +23547,13 @@
     TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
                                           DCI.isBeforeLegalizeOps());
     if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
-        TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
-                                 TLO)) {
-      // If we changed the computation somewhere in the DAG, this change
-      // will affect all users of Cond.
-      // Make sure it is fine and update all the nodes so that we do not
-      // use the generic VSELECT anymore. Otherwise, we may perform
-      // wrong optimizations as we messed up with the actual expectation
-      // for the vector boolean values.
-      if (Cond != TLO.Old) {
-        // Check all uses of that condition operand to check whether it will be
-        // consumed by non-BLEND instructions, which may depend on all bits are
-        // set properly.
-        for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
-             I != E; ++I)
-          if (I->getOpcode() != ISD::VSELECT)
-            // TODO: Add other opcodes eventually lowered into BLEND.
-            return SDValue();
-
-        // Update all the users of the condition, before committing the change,
-        // so that the VSELECT optimizations that expect the correct vector
-        // boolean value will not be triggered.
-        for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
-             I != E; ++I)
-          DAG.ReplaceAllUsesOfValueWith(
-              SDValue(*I, 0),
-              DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0),
-                          Cond, I->getOperand(1), I->getOperand(2)));
-        DCI.CommitTargetLoweringOpt(TLO);
-        return SDValue();
-      }
-      // At this point, only Cond is changed. Change the condition
-      // just for N to keep the opportunity to optimize all other
-      // users their own way.
-      DAG.ReplaceAllUsesOfValueWith(
-          SDValue(N, 0),
-          DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0),
-                      TLO.New, N->getOperand(1), N->getOperand(2)));
-      return SDValue();
-    }
+        (TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
+                                  TLO) &&
+         // Don't optimize vector of constants. Those are handled by
+         // the generic code and all the bits must be properly set for
+         // the generic optimizer.
+         !ISD::isBuildVectorOfConstantSDNodes(TLO.New.getNode())))
+      DCI.CommitTargetLoweringOpt(TLO);
   }
 
   // We should generate an X86ISD::BLENDI from a vselect if its argument
@@ -23595,9 +23567,7 @@
   // Iff we find this pattern and the build_vectors are built from
   // constants, we translate the vselect into a shuffle_vector that we
   // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
-  if ((N->getOpcode() == ISD::VSELECT ||
-       N->getOpcode() == X86ISD::SHRUNKBLEND) &&
-      !DCI.isBeforeLegalize()) {
+  if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) {
     SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
     if (Shuffle.getNode())
       return Shuffle;
@@ -25785,9 +25755,7 @@
   case ISD::EXTRACT_VECTOR_ELT:
     return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
   case ISD::VSELECT:
-  case ISD::SELECT:
-  case X86ISD::SHRUNKBLEND:
-    return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+  case ISD::SELECT:         return PerformSELECTCombine(N, DAG, DCI, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:            return PerformAddCombine(N, DAG, Subtarget);
   case ISD::SUB:            return PerformSubCombine(N, DAG, Subtarget);
Index: test/CodeGen/X86/vector-blend.ll
===================================================================
--- test/CodeGen/X86/vector-blend.ll
+++ test/CodeGen/X86/vector-blend.ll
@@ -419,8 +419,8 @@
 ;
 ; SSE41-LABEL: vsel_i648:
 ; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
-; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
 ; SSE41-NEXT:    movaps %xmm5, %xmm1
 ; SSE41-NEXT:    movaps %xmm7, %xmm3
 ; SSE41-NEXT:    retq
Index: test/CodeGen/X86/vselect-avx.ll
===================================================================
--- test/CodeGen/X86/vselect-avx.ll
+++ test/CodeGen/X86/vselect-avx.ll
@@ -59,19 +59,15 @@
 ; 
 ; <rdar://problem/18819506>
 
-; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we
-; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise,
-; even a faulty pattern would pass!
-;  
 ; CHECK-LABEL: test3:
-; Compute the original mask.
-;	CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]]
-; Shrink the bit of the mask.
-; CHECK-NEXT: vpslld	$31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]]
-; Use the shrunk mask in the blend.
-; CHECK-NEXT:	vblendvps	[[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
-; Use the original mask in the and.
-; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}} 
+; Compute the mask.
+;	CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
+; Do not shrink the bit of the mask.
+; CHECK-NOT: vpslld	$31, [[MASK]], {{%xmm[0-9]+}}
+; Use the mask in the blend.
+; CHECK-NEXT:	vblendvps	[[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; Use the mask in the and.
+; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}} 
 ; CHECK: retq
 define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17,  <4 x i16> %tmp3, <4 x i16> %tmp12) {
   %tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>