Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13734,6 +13734,13 @@
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
 }
 
+// Checks for a BUILD_VECTOR composed of either all-undef's, or constants
+// possibly mixed with undef's.
+static bool isAnyConstantBuildVector(const SDNode *N) {
+  return N->isUndef() || ISD::isBuildVectorOfConstantSDNodes(N) ||
+         ISD::isBuildVectorOfConstantFPSDNodes(N);
+}
+
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -13849,14 +13856,33 @@
 
   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+  // This combine is done in the following cases:
+  // 1. Both N0,N1 are BUILD_VECTOR's composed of constants or undefs.
+  // 2. Only one of N0,N1 is a BUILD_VECTOR composed of constants or undefs -
+  //    Combine iff that node is ALL_ZEROS. We prefer not to combine a
+  //    BUILD_VECTOR of all constants to allow efficient materialization of
+  //    constant vectors, but the ALL_ZEROS is an exception because
+  //    zero-extension matching seems to rely on having BUILD_VECTOR nodes with
+  //    zero padding between elements. FIXME: Eliminate this exception for
+  //    ALL_ZEROS constant vectors.
+  // 3. Neither N0,N1 are composed of only constants.
   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+    bool BothN0N1Const = isAnyConstantBuildVector(N0.getNode()) &&
+                         isAnyConstantBuildVector(N1.getNode());
+    SmallVector<bool, 2> IsConstNotAllZeroes = {
+        isAnyConstantBuildVector(N0.getNode()) &&
+            !ISD::isBuildVectorAllZeros(N0.getNode()),
+        isAnyConstantBuildVector(N1.getNode()) &&
+            !ISD::isBuildVectorAllZeros(N1.getNode())};
     SmallVector<SDValue, 8> Ops;
     for (int M : SVN->getMask()) {
       SDValue Op = DAG.getUNDEF(VT.getScalarType());
       if (M >= 0) {
         int Idx = M % NumElts;
         SDValue &S = (M < (int)NumElts ? N0 : N1);
-        if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+        bool IsUnsuitableConst = IsConstNotAllZeroes[M < (int)NumElts ? 0 : 1];
+        if (BothN0N1Const || (S.getOpcode() == ISD::BUILD_VECTOR &&
+                              S.hasOneUse() && !IsUnsuitableConst)) {
           Op = S.getOperand(Idx);
         } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
           if (Idx == 0)
Index: test/CodeGen/X86/vector-shuffle-combining.ll
===================================================================
--- test/CodeGen/X86/vector-shuffle-combining.ll
+++ test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2839,36 +2839,26 @@
 define <4 x float> @combine_constant_insertion_v4f32(float %f) {
 ; SSE2-LABEL: combine_constant_insertion_v4f32:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    movaps {{.*#+}} xmm1 = <u,4,5,3>
+; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: combine_constant_insertion_v4f32:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSSE3-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT:    movaps {{.*#+}} xmm1 = <u,4,5,3>
+; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: combine_constant_insertion_v4f32:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
-; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: combine_constant_insertion_v4f32:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
-; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
 ; AVX-NEXT:    retq
   %a0 = insertelement <4 x float> undef, float %f, i32 0
   %ret = shufflevector <4 x float> %a0, <4 x float> <float undef, float 4.0, float 5.0, float 3.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -2878,53 +2868,35 @@
 define <4 x i32> @combine_constant_insertion_v4i32(i32 %f) {
 ; SSE2-LABEL: combine_constant_insertion_v4i32:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    movl $30, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movl $4, %eax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    movl $5, %eax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    movaps {{.*#+}} xmm0 = <u,4,5,30>
+; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: combine_constant_insertion_v4i32:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movl $30, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movl $4, %eax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT:    movl $5, %eax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    movd %edi, %xmm0
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    movd %edi, %xmm1
+; SSSE3-NEXT:    movaps {{.*#+}} xmm0 = <u,4,5,30>
+; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: combine_constant_insertion_v4i32:
 ; SSE41:       # BB#0:
 ; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    movl $4, %eax
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
-; SSE41-NEXT:    movl $5, %eax
-; SSE41-NEXT:    pinsrd $2, %eax, %xmm0
-; SSE41-NEXT:    movl $30, %eax
-; SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5,6,7]
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: combine_constant_insertion_v4i32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    movl $4, %eax
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX-NEXT:    movl $5, %eax
-; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
-; AVX-NEXT:    movl $30, %eax
-; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: combine_constant_insertion_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd %edi, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_constant_insertion_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd %edi, %xmm0
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
+; AVX2-NEXT:    retq
   %a0 = insertelement <4 x i32> undef, i32 %f, i32 0
   %ret = shufflevector <4 x i32> %a0, <4 x i32> <i32 undef, i32 4, i32 5, i32 30>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x i32> %ret