Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -7675,14 +7675,16 @@ uint64_t NonZeros = 0; bool IsAllConstants = true; SmallSet Values; + unsigned NumConstants = NumElems; for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = Op.getOperand(i); if (Elt.isUndef()) continue; Values.insert(Elt); - if (Elt.getOpcode() != ISD::Constant && - Elt.getOpcode() != ISD::ConstantFP) + if (!isa(Elt) && !isa(Elt)) { IsAllConstants = false; + NumConstants--; + } if (X86::isZeroNode(Elt)) NumZero++; else { @@ -7696,6 +7698,52 @@ if (NumNonZero == 0) return DAG.getUNDEF(VT); + // If we are inserting one variable into a vector of non-zero constants, try + // to avoid loading each constant element as a scalar. Load the constants as a + // vector and then insert the variable scalar element. If insertion is not + // supported, we assume that we will fall back to a shuffle to get the scalar + // blended with the constants. Insertion into a zero vector is handled as a + // special-case somewhere below here. + LLVMContext &Context = *DAG.getContext(); + if (NumConstants == NumElems - 1 && NumNonZero != 1 && + (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || + isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { + // Create an all-constant vector. The variable element in the old + // build vector is replaced by undef in the constant vector. Save the + // variable scalar element and its index for use in the insertelement. + Type *EltType = Op.getValueType().getScalarType().getTypeForEVT(Context); + SmallVector ConstVecOps(NumElems, UndefValue::get(EltType)); + SDValue VarElt; + SDValue InsIndex; + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Elt = Op.getOperand(i); + if (auto *C = dyn_cast(Elt)) + ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue()); + else if (auto *C = dyn_cast(Elt)) + ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF()); + else if (!Elt.isUndef()) { + assert(!VarElt.getNode() && !InsIndex.getNode() && + "Expected one variable element in this vector"); + VarElt = Elt; + InsIndex = DAG.getConstant(i, dl, getVectorIdxTy(DAG.getDataLayout())); + } + } + Constant *CV = ConstantVector::get(ConstVecOps); + SDValue DAGConstVec = DAG.getConstantPool(CV, VT); + + // The constants we just created may not be legal (eg, floating point). We + // must lower the vector right here because we can not guarantee that we'll + // legalize it before loading it. This is also why we could not just create + // a new build vector here. If the build vector contains illegal constants, + // it could get split back up into a series of insert elements. + // TODO: Improve this by using shorter loads with broadcast/VZEXT_LOAD. + SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG); + MachineFunction &MF = DAG.getMachineFunction(); + MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); + SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); + } + // Special case for single non-zero, non-undef, element. if (NumNonZero == 1) { unsigned Idx = countTrailingZeros(NonZeros); @@ -7821,7 +7869,7 @@ // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. if (VT.is256BitVector() || VT.is512BitVector()) { - EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); + EVT HVT = EVT::getVectorVT(Context, ExtVT, NumElems/2); // Build both the lower and upper subvector. SDValue Lower = Index: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll +++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll @@ -60,11 +60,20 @@ ret <4 x float> %5 } +; FIXME: This could be 'movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]'. + define <2 x double> @test_negative_zero_2(<2 x double> %A) { -; CHECK-LABEL: test_negative_zero_2: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: retq +; SSE2-LABEL: test_negative_zero_2: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movapd {{.*#+}} xmm1 = +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_negative_zero_2: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] +; SSE41-NEXT: retq entry: %0 = extractelement <2 x double> %A, i32 0 %1 = insertelement <2 x double> undef, double %0, i32 0 Index: llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll +++ llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll @@ -11,374 +11,70 @@ define <16 x i8> @elt0_v16i8(i8 %x) { ; X32SSE2-LABEL: elt0_v16i8: ; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $15, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $14, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X32SSE2-NEXT: movl $13, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $12, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X32SSE2-NEXT: movl $11, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $10, %eax -; X32SSE2-NEXT: movd %eax, %xmm3 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X32SSE2-NEXT: movl $9, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $8, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X32SSE2-NEXT: movl $7, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $6, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32SSE2-NEXT: movl $5, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $4, %eax -; X32SSE2-NEXT: movd %eax, %xmm3 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X32SSE2-NEXT: movl $3, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $2, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32SSE2-NEXT: movl $1, %eax -; X32SSE2-NEXT: movd %eax, %xmm4 -; X32SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; X32SSE2-NEXT: andnps %xmm1, %xmm0 +; X32SSE2-NEXT: orps {{\.LCPI.*}}, %xmm0 ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt0_v16i8: ; X64SSE2: # BB#0: -; X64SSE2-NEXT: movl $15, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $14, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64SSE2-NEXT: movl $13, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $12, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X64SSE2-NEXT: movl $11, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $10, %eax -; X64SSE2-NEXT: movd %eax, %xmm3 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64SSE2-NEXT: movl $9, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $8, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64SSE2-NEXT: movl $7, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $6, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64SSE2-NEXT: movl $5, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $4, %eax -; X64SSE2-NEXT: movd %eax, %xmm3 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X64SSE2-NEXT: movl $3, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $2, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64SSE2-NEXT: movl $1, %eax -; X64SSE2-NEXT: movd %eax, %xmm4 -; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64SSE2-NEXT: movd %edi, %xmm1 +; X64SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; X64SSE2-NEXT: pandn %xmm1, %xmm0 +; X64SSE2-NEXT: por {{.*}}(%rip), %xmm0 ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt0_v16i8: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE4-NEXT: movl $1, %eax -; X32SSE4-NEXT: pinsrb $1, %eax, %xmm0 -; X32SSE4-NEXT: movl $2, %eax -; X32SSE4-NEXT: pinsrb $2, %eax, %xmm0 -; X32SSE4-NEXT: movl $3, %eax -; X32SSE4-NEXT: pinsrb $3, %eax, %xmm0 -; X32SSE4-NEXT: movl $4, %eax -; X32SSE4-NEXT: pinsrb $4, %eax, %xmm0 -; X32SSE4-NEXT: movl $5, %eax -; X32SSE4-NEXT: pinsrb $5, %eax, %xmm0 -; X32SSE4-NEXT: movl $6, %eax -; X32SSE4-NEXT: pinsrb $6, %eax, %xmm0 -; X32SSE4-NEXT: movl $7, %eax -; X32SSE4-NEXT: pinsrb $7, %eax, %xmm0 -; X32SSE4-NEXT: movl $8, %eax -; X32SSE4-NEXT: pinsrb $8, %eax, %xmm0 -; X32SSE4-NEXT: movl $9, %eax -; X32SSE4-NEXT: pinsrb $9, %eax, %xmm0 -; X32SSE4-NEXT: movl $10, %eax -; X32SSE4-NEXT: pinsrb $10, %eax, %xmm0 -; X32SSE4-NEXT: movl $11, %eax -; X32SSE4-NEXT: pinsrb $11, %eax, %xmm0 -; X32SSE4-NEXT: movl $12, %eax -; X32SSE4-NEXT: pinsrb $12, %eax, %xmm0 -; X32SSE4-NEXT: movl $13, %eax -; X32SSE4-NEXT: pinsrb $13, %eax, %xmm0 -; X32SSE4-NEXT: movl $14, %eax -; X32SSE4-NEXT: pinsrb $14, %eax, %xmm0 -; X32SSE4-NEXT: movl $15, %eax -; X32SSE4-NEXT: pinsrb $15, %eax, %xmm0 +; X32SSE4-NEXT: movdqa {{.*#+}} xmm0 = +; X32SSE4-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt0_v16i8: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movd %edi, %xmm0 -; X64SSE4-NEXT: movl $1, %eax -; X64SSE4-NEXT: pinsrb $1, %eax, %xmm0 -; X64SSE4-NEXT: movl $2, %eax -; X64SSE4-NEXT: pinsrb $2, %eax, %xmm0 -; X64SSE4-NEXT: movl $3, %eax -; X64SSE4-NEXT: pinsrb $3, %eax, %xmm0 -; X64SSE4-NEXT: movl $4, %eax -; X64SSE4-NEXT: pinsrb $4, %eax, %xmm0 -; X64SSE4-NEXT: movl $5, %eax -; X64SSE4-NEXT: pinsrb $5, %eax, %xmm0 -; X64SSE4-NEXT: movl $6, %eax -; X64SSE4-NEXT: pinsrb $6, %eax, %xmm0 -; X64SSE4-NEXT: movl $7, %eax -; X64SSE4-NEXT: pinsrb $7, %eax, %xmm0 -; X64SSE4-NEXT: movl $8, %eax -; X64SSE4-NEXT: pinsrb $8, %eax, %xmm0 -; X64SSE4-NEXT: movl $9, %eax -; X64SSE4-NEXT: pinsrb $9, %eax, %xmm0 -; X64SSE4-NEXT: movl $10, %eax -; X64SSE4-NEXT: pinsrb $10, %eax, %xmm0 -; X64SSE4-NEXT: movl $11, %eax -; X64SSE4-NEXT: pinsrb $11, %eax, %xmm0 -; X64SSE4-NEXT: movl $12, %eax -; X64SSE4-NEXT: pinsrb $12, %eax, %xmm0 -; X64SSE4-NEXT: movl $13, %eax -; X64SSE4-NEXT: pinsrb $13, %eax, %xmm0 -; X64SSE4-NEXT: movl $14, %eax -; X64SSE4-NEXT: pinsrb $14, %eax, %xmm0 -; X64SSE4-NEXT: movl $15, %eax -; X64SSE4-NEXT: pinsrb $15, %eax, %xmm0 +; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = +; X64SSE4-NEXT: pinsrb $0, %edi, %xmm0 ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt0_v16i8: ; X32AVX: # BB#0: -; X32AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32AVX-NEXT: movl $1, %eax -; X32AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $2, %eax -; X32AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $3, %eax -; X32AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $4, %eax -; X32AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $5, %eax -; X32AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $6, %eax -; X32AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $7, %eax -; X32AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $8, %eax -; X32AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $9, %eax -; X32AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $10, %eax -; X32AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $11, %eax -; X32AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $12, %eax -; X32AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $13, %eax -; X32AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $14, %eax -; X32AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $15, %eax -; X32AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = +; X32AVX-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt0_v16i8: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovd %edi, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $2, %eax -; X64AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $3, %eax -; X64AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $4, %eax -; X64AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $5, %eax -; X64AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $6, %eax -; X64AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $7, %eax -; X64AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $8, %eax -; X64AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $9, %eax -; X64AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $10, %eax -; X64AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $11, %eax -; X64AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $12, %eax -; X64AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $13, %eax -; X64AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $14, %eax -; X64AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $15, %eax -; X64AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = +; X64AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <16 x i8> , i8 %x, i32 0 ret <16 x i8> %ins } define <8 x i16> @elt5_v8i16(i16 %x) { -; X32SSE2-LABEL: elt5_v8i16: -; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $7, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $6, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X32SSE2-NEXT: movl $4, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X32SSE2-NEXT: movl $3, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movl $2, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X32SSE2-NEXT: movl $1, %eax -; X32SSE2-NEXT: movd %eax, %xmm3 -; X32SSE2-NEXT: movl $42, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; X32SSE2-NEXT: retl -; -; X64SSE2-LABEL: elt5_v8i16: -; X64SSE2: # BB#0: -; X64SSE2-NEXT: movl $7, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $6, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: movl $4, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X64SSE2-NEXT: movl $3, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $2, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64SSE2-NEXT: movl $1, %eax -; X64SSE2-NEXT: movd %eax, %xmm3 -; X64SSE2-NEXT: movl $42, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; X64SSE2-NEXT: retq -; -; X32SSE4-LABEL: elt5_v8i16: -; X32SSE4: # BB#0: -; X32SSE4-NEXT: movl $42, %eax -; X32SSE4-NEXT: movd %eax, %xmm0 -; X32SSE4-NEXT: movl $1, %eax -; X32SSE4-NEXT: pinsrw $1, %eax, %xmm0 -; X32SSE4-NEXT: movl $2, %eax -; X32SSE4-NEXT: pinsrw $2, %eax, %xmm0 -; X32SSE4-NEXT: movl $3, %eax -; X32SSE4-NEXT: pinsrw $3, %eax, %xmm0 -; X32SSE4-NEXT: movl $4, %eax -; X32SSE4-NEXT: pinsrw $4, %eax, %xmm0 -; X32SSE4-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 -; X32SSE4-NEXT: movl $6, %eax -; X32SSE4-NEXT: pinsrw $6, %eax, %xmm0 -; X32SSE4-NEXT: movl $7, %eax -; X32SSE4-NEXT: pinsrw $7, %eax, %xmm0 -; X32SSE4-NEXT: retl +; X32SSE-LABEL: elt5_v8i16: +; X32SSE: # BB#0: +; X32SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> +; X32SSE-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 +; X32SSE-NEXT: retl ; -; X64SSE4-LABEL: elt5_v8i16: -; X64SSE4: # BB#0: -; X64SSE4-NEXT: movl $42, %eax -; X64SSE4-NEXT: movd %eax, %xmm0 -; X64SSE4-NEXT: movl $1, %eax -; X64SSE4-NEXT: pinsrw $1, %eax, %xmm0 -; X64SSE4-NEXT: movl $2, %eax -; X64SSE4-NEXT: pinsrw $2, %eax, %xmm0 -; X64SSE4-NEXT: movl $3, %eax -; X64SSE4-NEXT: pinsrw $3, %eax, %xmm0 -; X64SSE4-NEXT: movl $4, %eax -; X64SSE4-NEXT: pinsrw $4, %eax, %xmm0 -; X64SSE4-NEXT: pinsrw $5, %edi, %xmm0 -; X64SSE4-NEXT: movl $6, %eax -; X64SSE4-NEXT: pinsrw $6, %eax, %xmm0 -; X64SSE4-NEXT: movl $7, %eax -; X64SSE4-NEXT: pinsrw $7, %eax, %xmm0 -; X64SSE4-NEXT: retq +; X64SSE-LABEL: elt5_v8i16: +; X64SSE: # BB#0: +; X64SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> +; X64SSE-NEXT: pinsrw $5, %edi, %xmm0 +; X64SSE-NEXT: retq ; ; X32AVX-LABEL: elt5_v8i16: ; X32AVX: # BB#0: -; X32AVX-NEXT: movl $42, %eax -; X32AVX-NEXT: vmovd %eax, %xmm0 -; X32AVX-NEXT: movl $1, %eax -; X32AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $2, %eax -; X32AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $3, %eax -; X32AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $4, %eax -; X32AVX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> ; X32AVX-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X32AVX-NEXT: movl $6, %eax -; X32AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $7, %eax -; X32AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt5_v8i16: ; X64AVX: # BB#0: -; X64AVX-NEXT: movl $42, %eax -; X64AVX-NEXT: vmovd %eax, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $2, %eax -; X64AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $3, %eax -; X64AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $4, %eax -; X64AVX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,3,4,u,6,7> ; X64AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 -; X64AVX-NEXT: movl $6, %eax -; X64AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $7, %eax -; X64AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <8 x i16> , i16 %x, i32 5 ret <8 x i16> %ins @@ -387,73 +83,41 @@ define <4 x i32> @elt3_v4i32(i32 %x) { ; X32SSE2-LABEL: elt3_v4i32: ; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $2, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32SSE2-NEXT: movl $1, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: movl $42, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u> +; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt3_v4i32: ; X64SSE2: # BB#0: -; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: movl $2, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64SSE2-NEXT: movl $1, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: movl $42, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64SSE2-NEXT: movd %edi, %xmm1 +; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = <42,1,2,u> +; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt3_v4i32: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movl $42, %eax -; X32SSE4-NEXT: movd %eax, %xmm0 -; X32SSE4-NEXT: movl $1, %eax -; X32SSE4-NEXT: pinsrd $1, %eax, %xmm0 -; X32SSE4-NEXT: movl $2, %eax -; X32SSE4-NEXT: pinsrd $2, %eax, %xmm0 +; X32SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u> ; X32SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt3_v4i32: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movl $42, %eax -; X64SSE4-NEXT: movd %eax, %xmm0 -; X64SSE4-NEXT: movl $1, %eax -; X64SSE4-NEXT: pinsrd $1, %eax, %xmm0 -; X64SSE4-NEXT: movl $2, %eax -; X64SSE4-NEXT: pinsrd $2, %eax, %xmm0 +; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = <42,1,2,u> ; X64SSE4-NEXT: pinsrd $3, %edi, %xmm0 ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt3_v4i32: ; X32AVX: # BB#0: -; X32AVX-NEXT: movl $42, %eax -; X32AVX-NEXT: vmovd %eax, %xmm0 -; X32AVX-NEXT: movl $1, %eax -; X32AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X32AVX-NEXT: movl $2, %eax -; X32AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u> ; X32AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt3_v4i32: ; X64AVX: # BB#0: -; X64AVX-NEXT: movl $42, %eax -; X64AVX-NEXT: vmovd %eax, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X64AVX-NEXT: movl $2, %eax -; X64AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <42,1,2,u> ; X64AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <4 x i32> , i32 %x, i32 3 @@ -469,13 +133,18 @@ ; X32SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32SSE-NEXT: retl ; -; X64SSE-LABEL: elt0_v2i64: -; X64SSE: # BB#0: -; X64SSE-NEXT: movq %rdi, %xmm0 -; X64SSE-NEXT: movl $1, %eax -; X64SSE-NEXT: movq %rax, %xmm1 -; X64SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64SSE-NEXT: retq +; X64SSE2-LABEL: elt0_v2i64: +; X64SSE2: # BB#0: +; X64SSE2-NEXT: movq %rdi, %xmm1 +; X64SSE2-NEXT: movapd {{.*#+}} xmm0 = +; X64SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X64SSE2-NEXT: retq +; +; X64SSE4-LABEL: elt0_v2i64: +; X64SSE4: # BB#0: +; X64SSE4-NEXT: movdqa {{.*#+}} xmm0 = +; X64SSE4-NEXT: pinsrq $0, %rdi, %xmm0 +; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt0_v2i64: ; X32AVX: # BB#0: @@ -487,10 +156,8 @@ ; ; X64AVX-LABEL: elt0_v2i64: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovq %rdi, %xmm0 -; X64AVX-NEXT: movl $1, %eax -; X64AVX-NEXT: vmovq %rax, %xmm1 -; X64AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = +; X64AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 ; X64AVX-NEXT: retq %ins = insertelement <2 x i64> , i64 %x, i32 0 ret <2 x i64> %ins @@ -500,56 +167,41 @@ ; X32SSE2-LABEL: elt1_v4f32: ; X32SSE2: # BB#0: ; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <42,u,2,3> +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt1_v4f32: ; X64SSE2: # BB#0: -; X64SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X64SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X64SSE2-NEXT: movaps %xmm1, %xmm0 +; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <42,u,2,3> +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt1_v4f32: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = <42,u,2,3> ; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] -; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] -; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt1_v4f32: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <42,u,2,3> ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] -; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] -; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] ; X64SSE4-NEXT: movaps %xmm1, %xmm0 ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt1_v4f32: ; X32AVX: # BB#0: -; X32AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32AVX-NEXT: vmovaps {{.*#+}} xmm0 = <42,u,2,3> ; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] -; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] -; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt1_v4f32: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <42,u,2,3> ; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X64AVX-NEXT: retq %ins = insertelement <4 x float> , float %x, i32 1 ret <4 x float> %ins @@ -558,26 +210,26 @@ define <2 x double> @elt1_v2f64(double %x) { ; X32SSE-LABEL: elt1_v2f64: ; X32SSE: # BB#0: -; X32SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <42,u> ; X32SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X32SSE-NEXT: retl ; ; X64SSE-LABEL: elt1_v2f64: ; X64SSE: # BB#0: -; X64SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64SSE-NEXT: movaps {{.*#+}} xmm1 = <42,u> ; X64SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64SSE-NEXT: movaps %xmm1, %xmm0 ; X64SSE-NEXT: retq ; ; X32AVX-LABEL: elt1_v2f64: ; X32AVX: # BB#0: -; X32AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32AVX-NEXT: vmovapd {{.*#+}} xmm0 = <42,u> ; X32AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt1_v2f64: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <42,u> ; X64AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; X64AVX-NEXT: retq %ins = insertelement <2 x double> , double %x, i32 1 @@ -587,80 +239,48 @@ define <8 x i32> @elt7_v8i32(i32 %x) { ; X32SSE2-LABEL: elt7_v8i32: ; X32SSE2: # BB#0: -; X32SSE2-NEXT: movl $6, %eax -; X32SSE2-NEXT: movd %eax, %xmm0 -; X32SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32SSE2-NEXT: movl $5, %eax -; X32SSE2-NEXT: movd %eax, %xmm2 -; X32SSE2-NEXT: movl $4, %eax -; X32SSE2-NEXT: movd %eax, %xmm1 -; X32SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X32SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u> +; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt7_v8i32: ; X64SSE2: # BB#0: ; X64SSE2-NEXT: movd %edi, %xmm0 -; X64SSE2-NEXT: movl $6, %eax -; X64SSE2-NEXT: movd %eax, %xmm2 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X64SSE2-NEXT: movl $5, %eax -; X64SSE2-NEXT: movd %eax, %xmm0 -; X64SSE2-NEXT: movl $4, %eax -; X64SSE2-NEXT: movd %eax, %xmm1 -; X64SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,6,u> +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt7_v8i32: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movl $4, %eax -; X32SSE4-NEXT: movd %eax, %xmm1 -; X32SSE4-NEXT: movl $5, %eax -; X32SSE4-NEXT: pinsrd $1, %eax, %xmm1 -; X32SSE4-NEXT: movl $6, %eax -; X32SSE4-NEXT: pinsrd $2, %eax, %xmm1 +; X32SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u> ; X32SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm1 ; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt7_v8i32: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movl $4, %eax -; X64SSE4-NEXT: movd %eax, %xmm1 -; X64SSE4-NEXT: movl $5, %eax -; X64SSE4-NEXT: pinsrd $1, %eax, %xmm1 -; X64SSE4-NEXT: movl $6, %eax -; X64SSE4-NEXT: pinsrd $2, %eax, %xmm1 +; X64SSE4-NEXT: movdqa {{.*#+}} xmm1 = <4,5,6,u> ; X64SSE4-NEXT: pinsrd $3, %edi, %xmm1 ; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt7_v8i32: ; X32AVX: # BB#0: -; X32AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [42,1,2,3] -; X32AVX-NEXT: movl $4, %eax -; X32AVX-NEXT: vmovd %eax, %xmm1 -; X32AVX-NEXT: movl $5, %eax -; X32AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X32AVX-NEXT: movl $6, %eax -; X32AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X32AVX-NEXT: vmovdqa {{.*#+}} ymm0 = <42,1,2,3,4,5,6,u> +; X32AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X32AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 ; X32AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt7_v8i32: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [42,1,2,3] -; X64AVX-NEXT: movl $4, %eax -; X64AVX-NEXT: vmovd %eax, %xmm1 -; X64AVX-NEXT: movl $5, %eax -; X64AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X64AVX-NEXT: movl $6, %eax -; X64AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64AVX-NEXT: vmovdqa {{.*#+}} ymm0 = <42,1,2,3,4,5,6,u> +; X64AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64AVX-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1 ; X64AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; X64AVX-NEXT: retq @@ -672,67 +292,47 @@ ; X32SSE2-LABEL: elt6_v8f32: ; X32SSE2: # BB#0: ; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7> +; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] +; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X32SSE2-NEXT: retl ; ; X64SSE2-LABEL: elt6_v8f32: ; X64SSE2: # BB#0: -; X64SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7> +; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] +; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X64SSE2-NEXT: retq ; ; X32SSE4-LABEL: elt6_v8f32: ; X32SSE4: # BB#0: -; X32SSE4-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; X32SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7> ; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] -; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] ; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X32SSE4-NEXT: retl ; ; X64SSE4-LABEL: elt6_v8f32: ; X64SSE4: # BB#0: -; X64SSE4-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7> ; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3] -; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] ; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00] ; X64SSE4-NEXT: retq ; ; X32AVX-LABEL: elt6_v8f32: ; X32AVX: # BB#0: -; X32AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] -; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] -; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] -; X32AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; X32AVX-NEXT: vmovaps {{.*#+}} ymm0 = <42,1,2,3,4,5,u,7> +; X32AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] -; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] ; X32AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt6_v8f32: ; X64AVX: # BB#0: -; X64AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] -; X64AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] +; X64AVX-NEXT: vmovaps {{.*#+}} ymm1 = <42,1,2,3,4,5,u,7> +; X64AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 ; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3] -; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X64AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X64AVX-NEXT: retq %ins = insertelement <8 x float> , float %x, i32 6 @@ -751,16 +351,24 @@ ; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0] ; X32SSE-NEXT: retl ; -; X64SSE-LABEL: elt5_v8i64: -; X64SSE: # BB#0: -; X64SSE-NEXT: movq %rdi, %xmm0 -; X64SSE-NEXT: movl $4, %eax -; X64SSE-NEXT: movq %rax, %xmm2 -; X64SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; X64SSE-NEXT: movaps {{.*#+}} xmm0 = [42,1] -; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3] -; X64SSE-NEXT: movaps {{.*#+}} xmm3 = [6,7] -; X64SSE-NEXT: retq +; X64SSE2-LABEL: elt5_v8i64: +; X64SSE2: # BB#0: +; X64SSE2-NEXT: movq %rdi, %xmm0 +; X64SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u> +; X64SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1] +; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3] +; X64SSE2-NEXT: movaps {{.*#+}} xmm3 = [6,7] +; X64SSE2-NEXT: retq +; +; X64SSE4-LABEL: elt5_v8i64: +; X64SSE4: # BB#0: +; X64SSE4-NEXT: movdqa {{.*#+}} xmm2 = <4,u> +; X64SSE4-NEXT: pinsrq $1, %rdi, %xmm2 +; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1] +; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = [2,3] +; X64SSE4-NEXT: movaps {{.*#+}} xmm3 = [6,7] +; X64SSE4-NEXT: retq ; ; X32AVX2-LABEL: elt5_v8i64: ; X32AVX2: # BB#0: @@ -774,11 +382,9 @@ ; ; X64AVX2-LABEL: elt5_v8i64: ; X64AVX2: # BB#0: -; X64AVX2-NEXT: vmovq %rdi, %xmm0 -; X64AVX2-NEXT: movl $4, %eax -; X64AVX2-NEXT: vmovq %rax, %xmm1 -; X64AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64AVX2-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm1 +; X64AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = <4,u,6,7> +; X64AVX2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 +; X64AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; X64AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3] ; X64AVX2-NEXT: retq ; @@ -795,13 +401,10 @@ ; ; X64AVX512F-LABEL: elt5_v8i64: ; X64AVX512F: # BB#0: -; X64AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [42,1,2,3] -; X64AVX512F-NEXT: vmovq %rdi, %xmm1 -; X64AVX512F-NEXT: movl $4, %eax -; X64AVX512F-NEXT: vmovq %rax, %xmm2 -; X64AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; X64AVX512F-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm1, %ymm1 -; X64AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X64AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = <42,1,2,3,4,u,6,7> +; X64AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; X64AVX512F-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 +; X64AVX512F-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 ; X64AVX512F-NEXT: retq %ins = insertelement <8 x i64> , i64 %x, i32 5 ret <8 x i64> %ins @@ -810,7 +413,7 @@ define <8 x double> @elt1_v8f64(double %x) { ; X32SSE-LABEL: elt1_v8f64: ; X32SSE: # BB#0: -; X32SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <42,u> ; X32SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2.000000e+00,3.000000e+00] ; X32SSE-NEXT: movaps {{.*#+}} xmm2 = [4.000000e+00,5.000000e+00] @@ -819,7 +422,7 @@ ; ; X64SSE-LABEL: elt1_v8f64: ; X64SSE: # BB#0: -; X64SSE-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero +; X64SSE-NEXT: movaps {{.*#+}} xmm4 = <42,u> ; X64SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0] ; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2.000000e+00,3.000000e+00] ; X64SSE-NEXT: movaps {{.*#+}} xmm2 = [4.000000e+00,5.000000e+00] @@ -829,52 +432,32 @@ ; ; X32AVX2-LABEL: elt1_v8f64: ; X32AVX2: # BB#0: -; X32AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32AVX2-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X32AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X32AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; X32AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32AVX2-NEXT: vmovapd {{.*#+}} ymm0 = <42,u,2,3> +; X32AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm0[0],mem[0] +; X32AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] ; X32AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] ; X32AVX2-NEXT: retl ; ; X64AVX2-LABEL: elt1_v8f64: ; X64AVX2: # BB#0: -; X64AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X64AVX2-NEXT: vmovapd {{.*#+}} ymm1 = <42,u,2,3> ; X64AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X64AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; X64AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] ; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] ; X64AVX2-NEXT: retq ; ; X32AVX512F-LABEL: elt1_v8f64: ; X32AVX512F: # BB#0: -; X32AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X32AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; X32AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X32AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; X32AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] -; X32AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; X32AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; X32AVX512F-NEXT: vmovapd {{.*#+}} zmm0 = <42,u,2,3,4,5,6,7> +; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm0[0],mem[0] +; X32AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; X32AVX512F-NEXT: retl ; ; X64AVX512F-LABEL: elt1_v8f64: ; X64AVX512F: # BB#0: -; X64AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X64AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; X64AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X64AVX512F-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] -; X64AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; X64AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X64AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; X64AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X64AVX512F-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] -; X64AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; X64AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; X64AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <42,u,2,3,4,5,6,7> +; X64AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0 ; X64AVX512F-NEXT: retq %ins = insertelement <8 x double> , double %x, i32 1 ret <8 x double> %ins