Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14947,6 +14947,29 @@ if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); + // If this is a splat of a bitcast from another vector, change to a + // concat_vector. + // For example: + // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> + // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) + // + // If X is a build_vector itself, the concat can become a larger build_vector. + // TODO: Maybe this is useful for non-splat too? + if (!LegalOperations) { + if (SDValue Splat = cast(N)->getSplatValue()) { + Splat = peekThroughBitcast(Splat); + EVT SrcVT = Splat.getValueType(); + if (SrcVT.isVector()) { + unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getVectorElementType(), NumElts); + SmallVector Ops(N->getNumOperands(), Splat); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops); + return DAG.getBitcast(VT, Concat); + } + } + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); Index: test/CodeGen/X86/insertelement-shuffle.ll =================================================================== --- test/CodeGen/X86/insertelement-shuffle.ll +++ test/CodeGen/X86/insertelement-shuffle.ll @@ -97,17 +97,9 @@ define <8 x i64> @insert_subvector_into_undef(i32 %x0, i32 %x1) nounwind { ; X32_AVX256-LABEL: insert_subvector_into_undef: ; X32_AVX256: # %bb.0: -; X32_AVX256-NEXT: pushl %ebp -; X32_AVX256-NEXT: movl %esp, %ebp -; X32_AVX256-NEXT: andl $-8, %esp -; X32_AVX256-NEXT: subl $8, %esp -; X32_AVX256-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32_AVX256-NEXT: vmovlps %xmm0, (%esp) ; X32_AVX256-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X32_AVX256-NEXT: vbroadcastsd %xmm0, %ymm0 ; X32_AVX256-NEXT: vmovaps %ymm0, %ymm1 -; X32_AVX256-NEXT: movl %ebp, %esp -; X32_AVX256-NEXT: popl %ebp ; X32_AVX256-NEXT: retl ; ; X64_AVX256-LABEL: insert_subvector_into_undef: