Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14947,6 +14947,29 @@
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(VT);
 
+  // If this is a splat of a bitcast from another vector, change to a
+  // concat_vector.
+  // For example:
+  //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
+  //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
+  //
+  // If X is a build_vector itself, the concat can become a larger build_vector.
+  // TODO: Maybe this is useful for non-splat too?
+  if (!LegalOperations) {
+    if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+      Splat = peekThroughBitcast(Splat);
+      EVT SrcVT = Splat.getValueType();
+      if (SrcVT.isVector()) {
+        unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
+        EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
+                                     SrcVT.getVectorElementType(), NumElts);
+        SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
+        SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
+        return DAG.getBitcast(VT, Concat);
+      }
+    }
+  }
+
   // Check if we can express BUILD VECTOR via subvector extract.
   if (!LegalTypes && (N->getNumOperands() > 1)) {
     SDValue Op0 = N->getOperand(0);
Index: llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll
+++ llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll
@@ -97,17 +97,9 @@
 define <8 x i64> @insert_subvector_into_undef(i32 %x0, i32 %x1) nounwind {
 ; X32_AVX256-LABEL: insert_subvector_into_undef:
 ; X32_AVX256:       # %bb.0:
-; X32_AVX256-NEXT:    pushl %ebp
-; X32_AVX256-NEXT:    movl %esp, %ebp
-; X32_AVX256-NEXT:    andl $-8, %esp
-; X32_AVX256-NEXT:    subl $8, %esp
-; X32_AVX256-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32_AVX256-NEXT:    vmovlps %xmm0, (%esp)
 ; X32_AVX256-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X32_AVX256-NEXT:    vbroadcastsd %xmm0, %ymm0
 ; X32_AVX256-NEXT:    vmovaps %ymm0, %ymm1
-; X32_AVX256-NEXT:    movl %ebp, %esp
-; X32_AVX256-NEXT:    popl %ebp
 ; X32_AVX256-NEXT:    retl
 ;
 ; X64_AVX256-LABEL: insert_subvector_into_undef: