Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -464,7 +464,6 @@ SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); - SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, @@ -15854,77 +15853,6 @@ return DAG.getBitcast(VT, BV); } -SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { - EVT VT = N->getValueType(0); - - unsigned NumInScalars = N->getNumOperands(); - SDLoc DL(N); - - EVT SrcVT = MVT::Other; - unsigned Opcode = ISD::DELETED_NODE; - unsigned NumDefs = 0; - - for (unsigned i = 0; i != NumInScalars; ++i) { - SDValue In = N->getOperand(i); - unsigned Opc = In.getOpcode(); - - if (Opc == ISD::UNDEF) - continue; - - // If all scalar values are floats and converted from integers. - if (Opcode == ISD::DELETED_NODE && - (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { - Opcode = Opc; - } - - if (Opc != Opcode) - return SDValue(); - - EVT InVT = In.getOperand(0).getValueType(); - - // If all scalar values are typed differently, bail out. It's chosen to - // simplify BUILD_VECTOR of integer types. - if (SrcVT == MVT::Other) - SrcVT = InVT; - if (SrcVT != InVT) - return SDValue(); - NumDefs++; - } - - // If the vector has just one element defined, it's not worth to fold it into - // a vectorized one. - if (NumDefs < 2) - return SDValue(); - - assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) - && "Should only handle conversion from integer to float."); - assert(SrcVT != MVT::Other && "Cannot determine source type!"); - - EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); - - if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) - return SDValue(); - - // Just because the floating-point vector type is legal does not necessarily - // mean that the corresponding integer vector type is. - if (!isTypeLegal(NVT)) - return SDValue(); - - SmallVector Opnds; - for (unsigned i = 0; i != NumInScalars; ++i) { - SDValue In = N->getOperand(i); - - if (In.isUndef()) - Opnds.push_back(DAG.getUNDEF(SrcVT)); - else - Opnds.push_back(In.getOperand(0)); - } - SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); - AddToWorklist(BV.getNode()); - - return DAG.getNode(Opcode, DL, VT, BV); -} - SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, @@ -16371,9 +16299,6 @@ if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) - return V; - if (SDValue V = reduceBuildVecToShuffle(N)) return V; Index: llvm/trunk/test/CodeGen/ARM/vdup.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/vdup.ll +++ llvm/trunk/test/CodeGen/ARM/vdup.ll @@ -488,11 +488,12 @@ ; CHECK-LABEL: check_spr_splat2: ; CHECK: @ %bb.0: ; CHECK-NEXT: lsl r2, r2, #16 -; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: asr r2, r2, #16 -; CHECK-NEXT: vdup.32 d16, r2 -; CHECK-NEXT: vcvt.f32.s32 d16, d16 -; CHECK-NEXT: vsub.f32 d16, d16, d17 +; CHECK-NEXT: vmov s0, r2 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vdup.32 d17, d0[0] +; CHECK-NEXT: vsub.f32 d16, d17, d16 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: mov pc, lr %conv = sitofp i16 %q to float @@ -505,13 +506,13 @@ define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) { ; CHECK-LABEL: check_spr_splat4: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov r12, sp -; CHECK-NEXT: vmov d19, r2, r3 -; CHECK-NEXT: vld1.16 {d16[]}, [r12:16] -; CHECK-NEXT: vmov d18, r0, r1 -; CHECK-NEXT: vmovl.s16 q8, d16 -; CHECK-NEXT: vcvt.f32.s32 q8, q8 -; CHECK-NEXT: vsub.f32 q8, q8, q9 +; CHECK-NEXT: ldrsh r12, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov s0, r12 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vdup.32 q9, d0[0] +; CHECK-NEXT: vsub.f32 q8, q9, q8 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr @@ -525,13 +526,13 @@ define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) { ; CHECK-LABEL: check_spr_splat4_lane1: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov r12, sp -; CHECK-NEXT: vmov d19, r2, r3 -; CHECK-NEXT: vld1.16 {d16[]}, [r12:16] -; CHECK-NEXT: vmov d18, r0, r1 -; CHECK-NEXT: vmovl.s16 q8, d16 -; CHECK-NEXT: vcvt.f32.s32 q8, q8 -; CHECK-NEXT: vsub.f32 q8, q8, q9 +; CHECK-NEXT: ldrsh r12, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov s0, r12 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vdup.32 q9, d0[0] +; CHECK-NEXT: vsub.f32 q8, q9, q8 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr Index: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll +++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll @@ -6181,14 +6181,15 @@ ; MIPS32R5-NEXT: addiu $1, $zero, -16 ; MIPS32R5-NEXT: and $sp, $sp, $1 ; MIPS32R5-NEXT: andi $1, $6, 255 -; MIPS32R5-NEXT: sw $1, 36($sp) -; MIPS32R5-NEXT: sw $1, 32($sp) +; MIPS32R5-NEXT: mtc1 $1, $f0 +; MIPS32R5-NEXT: cvt.s.w $f0, $f0 +; MIPS32R5-NEXT: swc1 $f0, 36($sp) +; MIPS32R5-NEXT: swc1 $f0, 32($sp) ; MIPS32R5-NEXT: sw $5, 4($sp) ; MIPS32R5-NEXT: sw $4, 0($sp) -; MIPS32R5-NEXT: ld.w $w0, 32($sp) -; MIPS32R5-NEXT: ffint_s.w $w0, $w0 -; MIPS32R5-NEXT: ld.w $w1, 0($sp) -; MIPS32R5-NEXT: fadd.w $w0, $w0, $w1 +; MIPS32R5-NEXT: ld.w $w0, 0($sp) +; MIPS32R5-NEXT: ld.w $w1, 32($sp) +; MIPS32R5-NEXT: fadd.w $w0, $w1, $w0 ; MIPS32R5-NEXT: lw $1, 84($fp) ; MIPS32R5-NEXT: sw $1, 20($sp) ; MIPS32R5-NEXT: lw $1, 80($fp) @@ -6209,13 +6210,14 @@ ; MIPS64R5-NEXT: .cfi_def_cfa_offset 48 ; MIPS64R5-NEXT: sll $1, $5, 0 ; MIPS64R5-NEXT: andi $1, $1, 255 -; MIPS64R5-NEXT: sw $1, 36($sp) -; MIPS64R5-NEXT: sw $1, 32($sp) +; MIPS64R5-NEXT: mtc1 $1, $f0 +; MIPS64R5-NEXT: cvt.s.w $f0, $f0 +; MIPS64R5-NEXT: swc1 $f0, 36($sp) +; MIPS64R5-NEXT: swc1 $f0, 32($sp) ; MIPS64R5-NEXT: sd $4, 0($sp) -; MIPS64R5-NEXT: ld.w $w0, 32($sp) -; MIPS64R5-NEXT: ffint_s.w $w0, $w0 -; MIPS64R5-NEXT: ld.w $w1, 0($sp) -; MIPS64R5-NEXT: fadd.w $w0, $w0, $w1 +; MIPS64R5-NEXT: ld.w $w0, 0($sp) +; MIPS64R5-NEXT: ld.w $w1, 32($sp) +; MIPS64R5-NEXT: fadd.w $w0, $w1, $w0 ; MIPS64R5-NEXT: sd $6, 16($sp) ; MIPS64R5-NEXT: ld.w $w1, 16($sp) ; MIPS64R5-NEXT: fadd.w $w0, $w0, $w1 @@ -6337,36 +6339,59 @@ ; MIPS64EB-NEXT: jr $ra ; MIPS64EB-NEXT: nop ; -; MIPS32R5-LABEL: mixed_32: -; MIPS32R5: # %bb.0: # %entry -; MIPS32R5-NEXT: ldi.b $w0, 0 -; MIPS32R5-NEXT: insert.w $w0[0], $6 -; MIPS32R5-NEXT: insert.w $w0[1], $7 -; MIPS32R5-NEXT: lw $1, 16($sp) -; MIPS32R5-NEXT: insert.w $w0[2], $1 -; MIPS32R5-NEXT: lw $1, 20($sp) -; MIPS32R5-NEXT: insert.w $w0[3], $1 -; MIPS32R5-NEXT: lw $1, 24($sp) -; MIPS32R5-NEXT: fill.w $w1, $1 -; MIPS32R5-NEXT: ffint_u.w $w1, $w1 -; MIPS32R5-NEXT: fadd.w $w0, $w1, $w0 -; MIPS32R5-NEXT: st.w $w0, 0($4) -; MIPS32R5-NEXT: jr $ra -; MIPS32R5-NEXT: nop +; MIPS32R5EB-LABEL: mixed_32: +; MIPS32R5EB: # %bb.0: # %entry +; MIPS32R5EB-NEXT: addiu $sp, $sp, -8 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R5EB-NEXT: lui $1, 17200 +; MIPS32R5EB-NEXT: sw $1, 0($sp) +; MIPS32R5EB-NEXT: lw $1, 32($sp) +; MIPS32R5EB-NEXT: sw $1, 4($sp) +; MIPS32R5EB-NEXT: lui $1, %hi($CPI41_0) +; MIPS32R5EB-NEXT: ldc1 $f0, %lo($CPI41_0)($1) +; MIPS32R5EB-NEXT: ldc1 $f1, 0($sp) +; MIPS32R5EB-NEXT: sub.d $f0, $f1, $f0 +; MIPS32R5EB-NEXT: cvt.s.d $f0, $f0 +; MIPS32R5EB-NEXT: ldi.b $w1, 0 +; MIPS32R5EB-NEXT: splati.w $w0, $w0[0] +; MIPS32R5EB-NEXT: insert.w $w1[0], $6 +; MIPS32R5EB-NEXT: insert.w $w1[1], $7 +; MIPS32R5EB-NEXT: lw $1, 24($sp) +; MIPS32R5EB-NEXT: insert.w $w1[2], $1 +; MIPS32R5EB-NEXT: lw $1, 28($sp) +; MIPS32R5EB-NEXT: insert.w $w1[3], $1 +; MIPS32R5EB-NEXT: fadd.w $w0, $w0, $w1 +; MIPS32R5EB-NEXT: st.w $w0, 0($4) +; MIPS32R5EB-NEXT: addiu $sp, $sp, 8 +; MIPS32R5EB-NEXT: jr $ra +; MIPS32R5EB-NEXT: nop ; ; MIPS64R5EB-LABEL: mixed_32: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: ldi.b $w0, 0 -; MIPS64R5EB-NEXT: insert.d $w0[0], $4 -; MIPS64R5EB-NEXT: insert.d $w0[1], $5 -; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS64R5EB-NEXT: sll $1, $6, 0 -; MIPS64R5EB-NEXT: fill.w $w1, $1 -; MIPS64R5EB-NEXT: ffint_u.w $w1, $w1 -; MIPS64R5EB-NEXT: fadd.w $w0, $w1, $w0 +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(mixed_32))) +; MIPS64R5EB-NEXT: daddu $1, $1, $25 +; MIPS64R5EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) +; MIPS64R5EB-NEXT: lui $2, 17200 +; MIPS64R5EB-NEXT: sw $2, 8($sp) +; MIPS64R5EB-NEXT: sll $2, $6, 0 +; MIPS64R5EB-NEXT: sw $2, 12($sp) +; MIPS64R5EB-NEXT: ld $1, %got_page(.LCPI41_0)($1) +; MIPS64R5EB-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) +; MIPS64R5EB-NEXT: ldc1 $f1, 8($sp) +; MIPS64R5EB-NEXT: sub.d $f0, $f1, $f0 +; MIPS64R5EB-NEXT: ldi.b $w1, 0 +; MIPS64R5EB-NEXT: insert.d $w1[0], $4 +; MIPS64R5EB-NEXT: insert.d $w1[1], $5 +; MIPS64R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS64R5EB-NEXT: cvt.s.d $f0, $f0 +; MIPS64R5EB-NEXT: splati.w $w0, $w0[0] +; MIPS64R5EB-NEXT: fadd.w $w0, $w0, $w1 ; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177 ; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0] ; MIPS64R5EB-NEXT: copy_s.d $3, $w0[1] +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -6445,17 +6470,57 @@ ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop ; +; MIPS32R5EL-LABEL: mixed_32: +; MIPS32R5EL: # %bb.0: # %entry +; MIPS32R5EL-NEXT: addiu $sp, $sp, -8 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R5EL-NEXT: lui $1, 17200 +; MIPS32R5EL-NEXT: sw $1, 4($sp) +; MIPS32R5EL-NEXT: lw $1, 32($sp) +; MIPS32R5EL-NEXT: sw $1, 0($sp) +; MIPS32R5EL-NEXT: lui $1, %hi($CPI41_0) +; MIPS32R5EL-NEXT: ldc1 $f0, %lo($CPI41_0)($1) +; MIPS32R5EL-NEXT: ldc1 $f1, 0($sp) +; MIPS32R5EL-NEXT: sub.d $f0, $f1, $f0 +; MIPS32R5EL-NEXT: cvt.s.d $f0, $f0 +; MIPS32R5EL-NEXT: ldi.b $w1, 0 +; MIPS32R5EL-NEXT: splati.w $w0, $w0[0] +; MIPS32R5EL-NEXT: insert.w $w1[0], $6 +; MIPS32R5EL-NEXT: insert.w $w1[1], $7 +; MIPS32R5EL-NEXT: lw $1, 24($sp) +; MIPS32R5EL-NEXT: insert.w $w1[2], $1 +; MIPS32R5EL-NEXT: lw $1, 28($sp) +; MIPS32R5EL-NEXT: insert.w $w1[3], $1 +; MIPS32R5EL-NEXT: fadd.w $w0, $w0, $w1 +; MIPS32R5EL-NEXT: st.w $w0, 0($4) +; MIPS32R5EL-NEXT: addiu $sp, $sp, 8 +; MIPS32R5EL-NEXT: jr $ra +; MIPS32R5EL-NEXT: nop +; ; MIPS64R5EL-LABEL: mixed_32: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: ldi.b $w0, 0 -; MIPS64R5EL-NEXT: insert.d $w0[0], $4 -; MIPS64R5EL-NEXT: insert.d $w0[1], $5 -; MIPS64R5EL-NEXT: sll $1, $6, 0 -; MIPS64R5EL-NEXT: fill.w $w1, $1 -; MIPS64R5EL-NEXT: ffint_u.w $w1, $w1 -; MIPS64R5EL-NEXT: fadd.w $w0, $w1, $w0 +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(mixed_32))) +; MIPS64R5EL-NEXT: daddu $1, $1, $25 +; MIPS64R5EL-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) +; MIPS64R5EL-NEXT: lui $2, 17200 +; MIPS64R5EL-NEXT: sw $2, 12($sp) +; MIPS64R5EL-NEXT: sll $2, $6, 0 +; MIPS64R5EL-NEXT: sw $2, 8($sp) +; MIPS64R5EL-NEXT: ld $1, %got_page(.LCPI41_0)($1) +; MIPS64R5EL-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) +; MIPS64R5EL-NEXT: ldc1 $f1, 8($sp) +; MIPS64R5EL-NEXT: sub.d $f0, $f1, $f0 +; MIPS64R5EL-NEXT: ldi.b $w1, 0 +; MIPS64R5EL-NEXT: insert.d $w1[0], $4 +; MIPS64R5EL-NEXT: insert.d $w1[1], $5 +; MIPS64R5EL-NEXT: cvt.s.d $f0, $f0 +; MIPS64R5EL-NEXT: splati.w $w0, $w0[0] +; MIPS64R5EL-NEXT: fadd.w $w0, $w0, $w1 ; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0] ; MIPS64R5EL-NEXT: copy_s.d $3, $w0[1] +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: Index: llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "9 machinelicm" +; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "6 machinelicm" ; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s ; rdar://6627786 ; rdar://7792037 @@ -24,15 +24,17 @@ ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_3: ## %bb.i ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: pinsrd $1, 4, %xmm0 -; CHECK-NEXT: pinsrd $2, 8, %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2],mem[3],xmm1[4],mem[5],xmm1[6],mem[7] -; CHECK-NEXT: psrld $16, %xmm0 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm0 +; CHECK-NEXT: movl 4, %eax +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm1 +; CHECK-NEXT: movl 8, %eax +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm2 +; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: movaps %xmm0, 0 ; CHECK-NEXT: LBB0_1: ## %bb4 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 Index: llvm/trunk/test/CodeGen/X86/cvtv2f32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cvtv2f32.ll +++ llvm/trunk/test/CodeGen/X86/cvtv2f32.ll @@ -8,26 +8,27 @@ define <2 x float> @uitofp_2i32_cvt_buildvector(i32 %x, i32 %y, <2 x float> %v) { ; X32-LABEL: uitofp_2i32_cvt_buildvector: ; X32: # %bb.0: -; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] -; X32-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; X32-NEXT: psrld $16, %xmm1 -; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2],mem[3],xmm1[4],mem[5],xmm1[6],mem[7] -; X32-NEXT: addps {{\.LCPI.*}}, %xmm1 -; X32-NEXT: addps %xmm2, %xmm1 +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; X32-NEXT: orpd %xmm2, %xmm1 +; X32-NEXT: subsd %xmm2, %xmm1 +; X32-NEXT: cvtsd2ss %xmm1, %xmm1 +; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X32-NEXT: orpd %xmm2, %xmm3 +; X32-NEXT: subsd %xmm2, %xmm3 +; X32-NEXT: xorps %xmm2, %xmm2 +; X32-NEXT: cvtsd2ss %xmm3, %xmm2 +; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; X32-NEXT: mulps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: uitofp_2i32_cvt_buildvector: ; X64: # %bb.0: -; X64-NEXT: movd %edi, %xmm1 -; X64-NEXT: pinsrd $1, %esi, %xmm1 -; X64-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] -; X64-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; X64-NEXT: psrld $16, %xmm1 -; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2],mem[3],xmm1[4],mem[5],xmm1[6],mem[7] -; X64-NEXT: addps {{.*}}(%rip), %xmm1 -; X64-NEXT: addps %xmm2, %xmm1 +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2ssq %rax, %xmm1 +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cvtsi2ssq %rax, %xmm2 +; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; X64-NEXT: mulps %xmm1, %xmm0 ; X64-NEXT: retq %t1 = uitofp i32 %x to float