Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13802,10 +13802,17 @@ } SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + // If the input vector is another INSERT_SUBVECTOR, and this insert replaces + // the last insertion, then insert into the common source vector. + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(2) == N2) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), + N1, N2); + if (N0.getValueType() != N1.getValueType()) return SDValue(); @@ -13814,7 +13821,6 @@ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast(N2)->getAPIntValue(); - EVT VT = N->getValueType(0); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) Index: test/CodeGen/X86/avx-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -1026,9 +1026,8 @@ ; X32: # BB#0: ; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm2 +; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_insert_epi64: Index: test/CodeGen/X86/insertelement-zero.ll =================================================================== --- test/CodeGen/X86/insertelement-zero.ll +++ test/CodeGen/X86/insertelement-zero.ll @@ -587,7 +587,6 @@ ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -601,7 +600,6 @@ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq