Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -7650,14 +7650,17 @@ uint64_t NonZeros = 0; bool IsAllConstants = true; SmallSet Values; + unsigned NumConstants = NumElems; for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = Op.getOperand(i); if (Elt.isUndef()) continue; Values.insert(Elt); if (Elt.getOpcode() != ISD::Constant && - Elt.getOpcode() != ISD::ConstantFP) + Elt.getOpcode() != ISD::ConstantFP) { IsAllConstants = false; + NumConstants--; + } if (X86::isZeroNode(Elt)) NumZero++; else { @@ -7671,6 +7674,32 @@ if (NumNonZero == 0) return DAG.getUNDEF(VT); + // If we are inserting one variable into a vector of non-zero constants, do + // not load each constant element as a scalar. Load the constants as a vector + // and then insert the variable scalar element. Insertion into a zero vector + // is handled as special-cases somewhere below here. + if (NumConstants == NumElems - 1 && NumNonZero != 1) { + // Create an all-constant build vector. The variable element in the old BV + // is replaced by undef in the new BV. + SmallVector NewBVOps(NumElems); + SDValue VarElt; + SDValue InsIndex; + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Elt = Op.getOperand(i); + if (isa(Elt) || isa(Elt) || + Elt.isUndef()) { + NewBVOps[i] = Elt; + } else { + NewBVOps[i] = DAG.getUNDEF(ExtVT); + VarElt = Elt; + InsIndex = DAG.getConstant(i, dl, getVectorIdxTy(DAG.getDataLayout())); + } + } + + SDValue NewBV = DAG.getBuildVector(VT, dl, NewBVOps); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewBV, VarElt, InsIndex); + } + // Special case for single non-zero, non-undef, element. if (NumNonZero == 1) { unsigned Idx = countTrailingZeros(NonZeros); Index: test/CodeGen/X86/insert-into-constant-vector.ll =================================================================== --- test/CodeGen/X86/insert-into-constant-vector.ll +++ test/CodeGen/X86/insert-into-constant-vector.ll @@ -11,374 +11,70 @@ define <16 x i8> @elt0_v16i8(i8 %x) { ; X32SSE2-LABEL: elt0_v16i8: ; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $15, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $14, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X32SSE2-NEXT: movl $13, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $12, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X32SSE2-NEXT: movl $11, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $10, %eax -; X32SSE2-NEXT: movd %eax, %xmm3 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X32SSE2-NEXT: movl $9, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $8, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X32SSE2-NEXT: movl $7, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $6, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32SSE2-NEXT: movl $5, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $4, %eax -; X32SSE2-NEXT: movd %eax, %xmm3 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X32SSE2-NEXT: movl $3, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $2, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32SSE2-NEXT: movl $1, %eax -; X32SSE2-NEXT: movd %eax, %xmm4 -; X32SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; X32SSE2-NEXT: andnps %xmm1, %xmm0 +; X32SSE2-NEXT: orps {{\.LCPI.*}}, %xmm0 ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt0_v16i8: ; X64SSE2: # BB#0: -; X64SSE2-NEXT: movl $15, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $14, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64SSE2-NEXT: movl $13, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $12, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X64SSE2-NEXT: movl $11, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $10, %eax -; X64SSE2-NEXT: movd %eax, %xmm3 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64SSE2-NEXT: movl $9, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $8, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64SSE2-NEXT: movl $7, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $6, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64SSE2-NEXT: movl $5, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $4, %eax -; X64SSE2-NEXT: movd %eax, %xmm3 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X64SSE2-NEXT: movl $3, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $2, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64SSE2-NEXT: movl $1, %eax -; X64SSE2-NEXT: movd %eax, %xmm4 -; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64SSE2-NEXT: movd %edi, %xmm1 +; X64SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; X64SSE2-NEXT: pandn %xmm1, %xmm0 +; X64SSE2-NEXT: por {{.*}}(%rip), %xmm0 ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt0_v16i8: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE4-NEXT: movl $1, %eax -; X32SSE4-NEXT: pinsrb $1, %eax, %xmm0 -; X32SSE4-NEXT: movl $2, %eax -; X32SSE4-NEXT: pinsrb $2, %eax, %xmm0 -; X32SSE4-NEXT: movl $3, %eax -; X32SSE4-NEXT: pinsrb $3, %eax, %xmm0 -; X32SSE4-NEXT: movl $4, %eax -; X32SSE4-NEXT: pinsrb $4, %eax, %xmm0 -; X32SSE4-NEXT: movl $5, %eax -; X32SSE4-NEXT: pinsrb $5, %eax, %xmm0 -; X32SSE4-NEXT: movl $6, %eax -; X32SSE4-NEXT: pinsrb $6, %eax, %xmm0 -; X32SSE4-NEXT: movl $7, %eax -; X32SSE4-NEXT: pinsrb $7, %eax, %xmm0 -; X32SSE4-NEXT: movl $8, %eax -; X32SSE4-NEXT: pinsrb $8, %eax, %xmm0 -; X32SSE4-NEXT: movl $9, %eax -; X32SSE4-NEXT: pinsrb $9, %eax, %xmm0 -; X32SSE4-NEXT: movl $10, %eax -; X32SSE4-NEXT: pinsrb $10, %eax, %xmm0 -; X32SSE4-NEXT: movl $11, %eax -; X32SSE4-NEXT: pinsrb $11, %eax, %xmm0 -; X32SSE4-NEXT: movl $12, %eax -; X32SSE4-NEXT: pinsrb $12, %eax, %xmm0 -; X32SSE4-NEXT: movl $13, %eax -; X32SSE4-NEXT: pinsrb $13, %eax, %xmm0 -; X32SSE4-NEXT: movl $14, %eax -; X32SSE4-NEXT: pinsrb $14, %eax, %xmm0 -; X32SSE4-NEXT: movl $15, %eax -; X32SSE4-NEXT: pinsrb $15, %eax, %xmm0 +; X32SSE4-NEXT: movdqa {{.*#+}} xmm0 = +; X32SSE4-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt0_v16i8: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movd %edi, %xmm0 -; X64SSE4-NEXT: movl $1, %eax -; X64SSE4-NEXT: pinsrb $1, %eax, %xmm0 -; X64SSE4-NEXT: movl $2, %eax -; X64SSE4-NEXT: pinsrb $2, %eax, %xmm0 -; X64SSE4-NEXT: movl $3, %eax -; X64SSE4-NEXT: pinsrb $3, %eax, %xmm0 -; X64SSE4-NEXT: movl $4, %eax -; X64SSE4-NEXT: pinsrb $4, %eax, %xmm0 -; X64SSE4-NEXT: movl $5, %eax -; X64SSE4-NEXT: pinsrb $5, %eax, %xmm0 -; X64SSE4-NEXT: movl $6, %eax -; X64SSE4-NEXT: pinsrb $6, %eax, %xmm0 -; X64SSE4-NEXT: movl $7, %eax -; X64SSE4-NEXT: pinsrb $7, %eax, %xmm0 -; X64SSE4-NEXT: movl $8, %eax -; X64SSE4-NEXT: pinsrb $8, %eax, %xmm0 -; X64SSE4-NEXT: movl $9, %eax -; X64SSE4-NEXT: pinsrb $9, %eax, %xmm0 -; X64SSE4-NEXT: movl $10, %eax -; X64SSE4-NEXT: pinsrb $10, %eax, %xmm0 -; X64SSE4-NEXT: movl $11, %eax -; X64SSE4-NEXT: pinsrb $11, %eax, %xmm0 -; X64SSE4-NEXT: movl $12, %eax -; X64SSE4-NEXT: pinsrb $12, %eax, %xmm0 -; X64SSE4-NEXT: movl $13, %eax -; X64SSE4-NEXT: pinsrb $13, %eax, %xmm0 -; X64SSE4-NEXT: movl $14, %eax -; X64SSE4-NEXT: pinsrb $14, %eax, %xmm0 -; X64SSE4-NEXT: movl $15, %eax -; X64SSE4-NEXT: pinsrb $15, %eax, %xmm0 +; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = +; X64SSE4-NEXT: pinsrb $0, %edi, %xmm0 ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt0_v16i8: ; X32AVX: # BB#0: -; X32AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32AVX-NEXT: movl $1, %eax -; X32AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $2, %eax -; X32AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $3, %eax -; X32AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $4, %eax -; X32AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $5, %eax -; X32AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $6, %eax -; X32AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $7, %eax -; X32AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $8, %eax -; X32AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $9, %eax -; X32AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $10, %eax -; X32AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $11, %eax -; X32AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $12, %eax -; X32AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $13, %eax -; X32AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $14, %eax -; X32AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $15, %eax -; X32AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = +; X32AVX-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt0_v16i8: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovd %edi, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $2, %eax -; X64AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $3, %eax -; X64AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $4, %eax -; X64AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $5, %eax -; X64AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $6, %eax -; X64AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $7, %eax -; X64AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $8, %eax -; X64AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $9, %eax -; X64AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $10, %eax -; X64AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $11, %eax -; X64AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $12, %eax -; X64AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $13, %eax -; X64AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $14, %eax -; X64AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $15, %eax -; X64AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = +; X64AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <16 x i8> , i8 %x, i32 0 ret <16 x i8> %ins } define <8 x i16> @elt5_v8i16(i16 %x) { -; X32SSE2-LABEL: elt5_v8i16: -; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $7, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $6, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X32SSE2-NEXT: movl $4, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X32SSE2-NEXT: movl $3, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $2, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X32SSE2-NEXT: movl $1, %eax -; X32SSE2-NEXT: movd %eax, %xmm3 -; X32SSE2-NEXT: movl $42, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; X32SSE2-NEXT: retl -; -; X64SSE2-LABEL: elt5_v8i16: -; X64SSE2: # BB#0: -; X64SSE2-NEXT: movl $7, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $6, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: movl $4, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X64SSE2-NEXT: movl $3, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $2, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64SSE2-NEXT: movl $1, %eax -; X64SSE2-NEXT: movd %eax, %xmm3 -; X64SSE2-NEXT: movl $42, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; X64SSE2-NEXT: retq -; -; X32SSE4-LABEL: elt5_v8i16: -; X32SSE4: # BB#0: -; X32SSE4-NEXT: movl $42, %eax -; X32SSE4-NEXT: movd %eax, %xmm0 -; X32SSE4-NEXT: movl $1, %eax -; X32SSE4-NEXT: pinsrw $1, %eax, %xmm0 -; X32SSE4-NEXT: movl $2, %eax -; X32SSE4-NEXT: pinsrw $2, %eax, %xmm0 -; X32SSE4-NEXT: movl $3, %eax -; X32SSE4-NEXT: pinsrw $3, %eax, %xmm0 -; X32SSE4-NEXT: movl $4, %eax -; X32SSE4-NEXT: pinsrw $4, %eax, %xmm0 -; X32SSE4-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 -; X32SSE4-NEXT: movl $6, %eax -; X32SSE4-NEXT: pinsrw $6, %eax, %xmm0 -; X32SSE4-NEXT: movl $7, %eax -; X32SSE4-NEXT: pinsrw $7, %eax, %xmm0 -; X32SSE4-NEXT: retl +; X32SSE-LABEL: elt5_v8i16: +; X32SSE: # BB#0: +; X32SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> +; X32SSE-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 +; X32SSE-NEXT: retl ; -; X64SSE4-LABEL: elt5_v8i16: -; X64SSE4: # BB#0: -; X64SSE4-NEXT: movl $42, %eax -; X64SSE4-NEXT: movd %eax, %xmm0 -; X64SSE4-NEXT: movl $1, %eax -; X64SSE4-NEXT: pinsrw $1, %eax, %xmm0 -; X64SSE4-NEXT: movl $2, %eax -; X64SSE4-NEXT: pinsrw $2, %eax, %xmm0 -; X64SSE4-NEXT: movl $3, %eax -; X64SSE4-NEXT: pinsrw $3, %eax, %xmm0 -; X64SSE4-NEXT: movl $4, %eax -; X64SSE4-NEXT: pinsrw $4, %eax, %xmm0 -; X64SSE4-NEXT: pinsrw $5, %edi, %xmm0 -; X64SSE4-NEXT: movl $6, %eax -; X64SSE4-NEXT: pinsrw $6, %eax, %xmm0 -; X64SSE4-NEXT: movl $7, %eax -; X64SSE4-NEXT: pinsrw $7, %eax, %xmm0 -; X64SSE4-NEXT: retq +; X64SSE-LABEL: elt5_v8i16: +; X64SSE: # BB#0: +; X64SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> +; X64SSE-NEXT: pinsrw $5, %edi, %xmm0 +; X64SSE-NEXT: retq ; ; X32AVX-LABEL: elt5_v8i16: ; X32AVX: # BB#0: -; X32AVX-NEXT: movl $42, %eax -; X32AVX-NEXT: vmovd %eax, %xmm0 -; X32AVX-NEXT: movl $1, %eax -; X32AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $2, %eax -; X32AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $3, %eax -; X32AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $4, %eax -; X32AVX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> ; X32AVX-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X32AVX-NEXT: movl $6, %eax -; X32AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $7, %eax -; X32AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt5_v8i16: ; X64AVX: # BB#0: -; X64AVX-NEXT: movl $42, %eax -; X64AVX-NEXT: vmovd %eax, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $2, %eax -; X64AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $3, %eax -; X64AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $4, %eax -; X64AVX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> ; X64AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 -; X64AVX-NEXT: movl $6, %eax -; X64AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $7, %eax -; X64AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <8 x i16> , i16 %x, i32 5 ret <8 x i16> %ins @@ -387,73 +83,41 @@ define <4 x i32> @elt3_v4i32(i32 %x) { ; X32SSE2-LABEL: elt3_v4i32: ; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $2, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32SSE2-NEXT: movl $1, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: movl $42, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u> +; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt3_v4i32: ; X64SSE2: # BB#0: -; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: movl $2, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64SSE2-NEXT: movl $1, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: movl $42, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64SSE2-NEXT: movd %edi, %xmm1 +; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u> +; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt3_v4i32: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movl $42, %eax -; X32SSE4-NEXT: movd %eax, %xmm0 -; X32SSE4-NEXT: movl $1, %eax -; X32SSE4-NEXT: pinsrd $1, %eax, %xmm0 -; X32SSE4-NEXT: movl $2, %eax -; X32SSE4-NEXT: pinsrd $2, %eax, %xmm0 +; X32SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u> ; X32SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt3_v4i32: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movl $42, %eax -; X64SSE4-NEXT: movd %eax, %xmm0 -; X64SSE4-NEXT: movl $1, %eax -; X64SSE4-NEXT: pinsrd $1, %eax, %xmm0 -; X64SSE4-NEXT: movl $2, %eax -; X64SSE4-NEXT: pinsrd $2, %eax, %xmm0 +; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u> ; X64SSE4-NEXT: pinsrd $3, %edi, %xmm0 ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt3_v4i32: ; X32AVX: # BB#0: -; X32AVX-NEXT: movl $42, %eax -; X32AVX-NEXT: vmovd %eax, %xmm0 -; X32AVX-NEXT: movl $1, %eax -; X32AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $2, %eax -; X32AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u> ; X32AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt3_v4i32: ; X64AVX: # BB#0: -; X64AVX-NEXT: movl $42, %eax -; X64AVX-NEXT: vmovd %eax, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $2, %eax -; X64AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u> ; X64AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <4 x i32> , i32 %x, i32 3 @@ -469,13 +133,18 @@ ; X32SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32SSE-NEXT: retl ; -; X64SSE-LABEL: elt0_v2i64: -; X64SSE: # BB#0: -; X64SSE-NEXT: movq %rdi, %xmm0 -; X64SSE-NEXT: movl $1, %eax -; X64SSE-NEXT: movq %rax, %xmm1 -; X64SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64SSE-NEXT: retq +; X64SSE2-LABEL: elt0_v2i64: +; X64SSE2: # BB#0: +; X64SSE2-NEXT: movq %rdi, %xmm1 +; X64SSE2-NEXT: movapd {{.*#+}} xmm0 = +; X64SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X64SSE2-NEXT: retq +; +; X64SSE4-LABEL: elt0_v2i64: +; X64SSE4: # BB#0: +; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = +; X64SSE4-NEXT: pinsrq $0, %rdi, %xmm0 +; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt0_v2i64: ; X32AVX: # BB#0: @@ -487,10 +156,8 @@ ; ; X64AVX-LABEL: elt0_v2i64: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovq %rdi, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vmovq %rax, %xmm1 -; X64AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = +; X64AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <2 x i64> , i64 %x, i32 0 ret <2 x i64> %ins @@ -503,9 +170,10 @@ ; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt1_v4f32: @@ -514,42 +182,42 @@ ; X64SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X64SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X64SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X64SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X64SSE2-NEXT: movaps %xmm1, %xmm0 +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt1_v4f32: ; X32SSE4: # BB#0: ; X32SSE4-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt1_v4f32: ; X64SSE4: # BB#0: ; X64SSE4-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] ; X64SSE4-NEXT: movaps %xmm1, %xmm0 ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt1_v4f32: ; X32AVX: # BB#0: ; X32AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt1_v4f32: ; X64AVX: # BB#0: ; X64AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] ; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X64AVX-NEXT: retq %ins = insertelement <4 x float> , float %x, i32 1 ret <4 x float> %ins @@ -587,82 +255,50 @@ define <8 x i32> @elt7_v8i32(i32 %x) { ; X32SSE2-LABEL: elt7_v8i32: ; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $6, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32SSE2-NEXT: movl $5, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: movl $4, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u> +; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt7_v8i32: ; X64SSE2: # BB#0: ; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: movl $6, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X64SSE2-NEXT: movl $5, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $4, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u> +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt7_v8i32: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movl $4, %eax -; X32SSE4-NEXT: movd %eax, %xmm1 -; X32SSE4-NEXT: movl $5, %eax -; X32SSE4-NEXT: pinsrd $1, %eax, %xmm1 -; X32SSE4-NEXT: movl $6, %eax -; X32SSE4-NEXT: pinsrd $2, %eax, %xmm1 +; X32SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u> ; X32SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm1 ; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt7_v8i32: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movl $4, %eax -; X64SSE4-NEXT: movd %eax, %xmm1 -; X64SSE4-NEXT: movl $5, %eax -; X64SSE4-NEXT: pinsrd $1, %eax, %xmm1 -; X64SSE4-NEXT: movl $6, %eax -; X64SSE4-NEXT: pinsrd $2, %eax, %xmm1 +; X64SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u> ; X64SSE4-NEXT: pinsrd $3, %edi, %xmm1 ; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt7_v8i32: ; X32AVX: # BB#0: -; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [42,1,2,3] -; X32AVX-NEXT: movl $4, %eax -; X32AVX-NEXT: vmovd %eax, %xmm1 -; X32AVX-NEXT: movl $5, %eax -; X32AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X32AVX-NEXT: movl $6, %eax -; X32AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; X32AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; X32AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <4,5,6,u> +; X32AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} ymm1 = <42,1,2,3,4,5,6,u> +; X32AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt7_v8i32: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [42,1,2,3] -; X64AVX-NEXT: movl $4, %eax -; X64AVX-NEXT: vmovd %eax, %xmm1 -; X64AVX-NEXT: movl $5, %eax -; X64AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X64AVX-NEXT: movl $6, %eax -; X64AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; X64AVX-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1 -; X64AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <4,5,6,u> +; X64AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} ymm1 = <42,1,2,3,4,5,6,u> +; X64AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; X64AVX-NEXT: retq %ins = insertelement <8 x i32> , i32 %x, i32 7 ret <8 x i32> %ins @@ -672,23 +308,25 @@ ; X32SSE2-LABEL: elt6_v8f32: ; X32SSE2: # BB#0: ; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0] +; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt6_v8f32: ; X64SSE2: # BB#0: -; X64SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X64SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X64SSE2-NEXT: retq ; @@ -696,8 +334,8 @@ ; X32SSE4: # BB#0: ; X32SSE4-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] -; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] ; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] ; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X32SSE4-NEXT: retl ; @@ -705,8 +343,8 @@ ; X64SSE4: # BB#0: ; X64SSE4-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] -; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3] ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3] ; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X64SSE4-NEXT: retq ; @@ -718,8 +356,8 @@ ; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X32AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] -; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] ; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] ; X32AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32AVX-NEXT: retl ; @@ -727,12 +365,12 @@ ; X64AVX: # BB#0: ; X64AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; X64AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] ; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] -; X64AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X64AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X64AVX-NEXT: retq %ins = insertelement <8 x float> , float %x, i32 6 @@ -751,16 +389,26 @@ ; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0] ; X32SSE-NEXT: retl ; -; X64SSE-LABEL: elt5_v8i64: -; X64SSE: # BB#0: -; X64SSE-NEXT: movq %rdi, %xmm0 -; X64SSE-NEXT: movl $4, %eax -; X64SSE-NEXT: movq %rax, %xmm2 -; X64SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; X64SSE-NEXT: movaps {{.*#+}} xmm0 = [42,1] -; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3] -; X64SSE-NEXT: movaps {{.*#+}} xmm3 = [6,7] -; X64SSE-NEXT: retq +; X64SSE2-LABEL: elt5_v8i64: +; X64SSE2: # BB#0: +; X64SSE2-NEXT: movq %rdi, %xmm0 +; X64SSE2-NEXT: movl $4, %eax +; X64SSE2-NEXT: movq %rax, %xmm2 +; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1] +; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3] +; X64SSE2-NEXT: movaps {{.*#+}} xmm3 = [6,7] +; X64SSE2-NEXT: retq +; +; X64SSE4-LABEL: elt5_v8i64: +; X64SSE4: # BB#0: +; X64SSE4-NEXT: movl $4, %eax +; X64SSE4-NEXT: movq %rax, %xmm2 +; X64SSE4-NEXT: pinsrq $1, %rdi, %xmm2 +; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1] +; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = [2,3] +; X64SSE4-NEXT: movaps {{.*#+}} xmm3 = [6,7] +; X64SSE4-NEXT: retq ; ; X32AVX2-LABEL: elt5_v8i64: ; X32AVX2: # BB#0: @@ -774,11 +422,10 @@ ; ; X64AVX2-LABEL: elt5_v8i64: ; X64AVX2: # BB#0: -; X64AVX2-NEXT: vmovq %rdi, %xmm0 ; X64AVX2-NEXT: movl $4, %eax -; X64AVX2-NEXT: vmovq %rax, %xmm1 -; X64AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64AVX2-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm1 +; X64AVX2-NEXT: vmovq %rax, %xmm0 +; X64AVX2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 +; X64AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],mem[4,5,6,7] ; X64AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3] ; X64AVX2-NEXT: retq ; @@ -795,13 +442,11 @@ ; ; X64AVX512F-LABEL: elt5_v8i64: ; X64AVX512F: # BB#0: -; X64AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [42,1,2,3] -; X64AVX512F-NEXT: vmovq %rdi, %xmm1 ; X64AVX512F-NEXT: movl $4, %eax -; X64AVX512F-NEXT: vmovq %rax, %xmm2 -; X64AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; X64AVX512F-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm1, %ymm1 -; X64AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X64AVX512F-NEXT: vmovq %rax, %xmm0 +; X64AVX512F-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 +; X64AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <42,1,2,3,4,u,6,7> +; X64AVX512F-NEXT: vinserti32x4 $2, %xmm0, %zmm1, %zmm0 ; X64AVX512F-NEXT: retq %ins = insertelement <8 x i64> , i64 %x, i32 5 ret <8 x i64> %ins @@ -855,11 +500,12 @@ ; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; X32AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; X32AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] -; X32AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; X32AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; X32AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 +; X32AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 +; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; X32AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; X32AVX512F-NEXT: retl ; ; X64AVX512F-LABEL: elt1_v8f64: @@ -870,11 +516,12 @@ ; X64AVX512F-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] ; X64AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; X64AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; X64AVX512F-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; X64AVX512F-NEXT: vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0] +; X64AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm3 +; X64AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm3, %zmm1 ; X64AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; X64AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X64AVX512F-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] -; X64AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; X64AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; X64AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0 ; X64AVX512F-NEXT: retq %ins = insertelement <8 x double> , double %x, i32 1 ret <8 x double> %ins