Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13802,10 +13802,20 @@ } SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + // Combine INSERT_SUBVECTORs where we are inserting to the same index. + // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) + // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && + N0.getOperand(1).getValueType() == N1.getValueType() && + N0.getOperand(2) == N2) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), + N1, N2); + if (N0.getValueType() != N1.getValueType()) return SDValue(); @@ -13814,7 +13824,6 @@ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast(N2)->getAPIntValue(); - EVT VT = N->getValueType(0); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -1026,9 +1026,8 @@ ; X32: # BB#0: ; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm2 +; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_insert_epi64: Index: llvm/trunk/test/CodeGen/X86/insertelement-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/insertelement-zero.ll +++ llvm/trunk/test/CodeGen/X86/insertelement-zero.ll @@ -587,7 +587,6 @@ ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -601,7 +600,6 @@ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq