Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3013,48 +3013,55 @@
   }
 
   // Normalize the shuffle vector since mask and vector length don't match.
-  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
-    // Mask is longer than the source vectors and is a multiple of the source
-    // vectors.  We can use concatenate vector to make the mask and vectors
-    // lengths match.
-
-    unsigned NumConcat = MaskNumElts / SrcNumElts;
-
-    // Check if the shuffle is some kind of concatenation of the input vectors.
-    bool IsConcat = true;
-    SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
-    for (unsigned i = 0; i != MaskNumElts; ++i) {
-      int Idx = Mask[i];
-      if (Idx < 0)
-        continue;
-      // Ensure the indices in each SrcVT sized piece are sequential and that
-      // the same source is used for the whole piece.
-      if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
-          (ConcatSrcs[i / SrcNumElts] >= 0 &&
-           ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
-        IsConcat = false;
-        break;
+  if (SrcNumElts < MaskNumElts) {
+    // Mask is longer than the source vectors. We can use concatenate vector to
+    // make the mask and vectors lengths match.
+
+    if (MaskNumElts % SrcNumElts == 0) {
+      // Mask length is a multiple of the source vector length.
+      // Check if the shuffle is some kind of concatenation of the input
+      // vectors.
+      unsigned NumConcat = MaskNumElts / SrcNumElts;
+      bool IsConcat = true;
+      SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          continue;
+        // Ensure the indices in each SrcVT sized piece are sequential and that
+        // the same source is used for the whole piece.
+        if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+            (ConcatSrcs[i / SrcNumElts] >= 0 &&
+             ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+          IsConcat = false;
+          break;
+        }
+        // Remember which source this index came from.
+        ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
       }
-      // Remember which source this index came from.
-      ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
-    }
 
-    // The shuffle is concatenating multiple vectors together. Just emit
-    // a CONCAT_VECTORS operation.
-    if (IsConcat) {
-      SmallVector<SDValue, 8> ConcatOps;
-      for (auto Src : ConcatSrcs) {
-        if (Src < 0)
-          ConcatOps.push_back(DAG.getUNDEF(SrcVT));
-        else if (Src == 0)
-          ConcatOps.push_back(Src1);
-        else
-          ConcatOps.push_back(Src2);
+      // The shuffle is concatenating multiple vectors together. Just emit
+      // a CONCAT_VECTORS operation.
+      if (IsConcat) {
+        SmallVector<SDValue, 8> ConcatOps;
+        for (auto Src : ConcatSrcs) {
+          if (Src < 0)
+            ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+          else if (Src == 0)
+            ConcatOps.push_back(Src1);
+          else
+            ConcatOps.push_back(Src2);
+        }
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
+        return;
       }
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
-      return;
     }
 
+    unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+    unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+    EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+                                    PaddedMaskNumElts);
+
     // Pad both vectors with undefs to make them the same length as the mask.
     SDValue UndefVal = DAG.getUNDEF(SrcVT);
 
@@ -3063,10 +3070,12 @@
     MOps1[0] = Src1;
     MOps2[0] = Src2;
 
-    Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
-                          : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MOps1);
-    Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
-                          : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MOps2);
+    Src1 = Src1.isUndef()
+               ? DAG.getUNDEF(PaddedVT)
+               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+    Src2 = Src2.isUndef()
+               ? DAG.getUNDEF(PaddedVT)
+               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
 
     // Readjust mask for new input vector length.
     SmallVector<int, 8> MappedOps;
@@ -3077,7 +3086,19 @@
       MappedOps.push_back(Idx);
     }
 
-    setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
+    for (unsigned i = MaskNumElts; i != PaddedMaskNumElts; ++i)
+      MappedOps.push_back(-1);
+
+    SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
+
+    // If the concatenated vector was padded, extract a subvector with the correct
+    // number of elements.
+    if (MaskNumElts != PaddedMaskNumElts)
+      Result = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+    setValue(&I, Result);
     return;
   }
 
Index: test/CodeGen/X86/pr29025.ll
===================================================================
--- test/CodeGen/X86/pr29025.ll
+++ test/CodeGen/X86/pr29025.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-pc-linux -mattr=+avx < %s | FileCheck %s
+
+define void @foo(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) {
+; CHECK-LABEL: foo:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm1 = zero,xmm1[0,u],zero,xmm1[4,u],zero,xmm1[8,u],zero,xmm1[12,u,u,u,u,u]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[u,4],zero,xmm0[u,8],zero,xmm0[u,12],zero,xmm0[u,u,u,u,u]
+; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpextrd $2, %xmm0, 8(%eax)
+; CHECK-NEXT:    vpextrd $1, %xmm0, 4(%eax)
+; CHECK-NEXT:    vmovd %xmm0, (%eax)
+; CHECK-NEXT:    retl
+  %s1 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s2 = shufflevector <4 x i8> %c, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %r = shufflevector <8 x i8> %s1, <8 x i8> %s2, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i8> %r, <12 x i8>* %p, align 1
+  ret void
+}