Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -820,19 +820,6 @@ Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; } -// TODO: This is largely to trick fastisel into ignoring the pattern. -def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2), - (X86Unpckh node:$src1, node:$src2), [{ - return N->getOperand(0) == N->getOperand(1); -}]>; - -let Predicates = [UseSSE2] in { - // TODO: This is a hack pattern to allow lowering to emit unpckh instead of - // movhlps for sse2 without changing a bunch of tests. - def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)), - (MOVHLPSrr VR128:$src, VR128:$src)>; -} - //===----------------------------------------------------------------------===// // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// Index: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll +++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll @@ -38,7 +38,7 @@ ; SSE2-LABEL: test_negative_zero_1: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero Index: llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll +++ llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll @@ -197,7 +197,7 @@ ; SSE-NEXT: cvtss2sd %xmm2, %xmm4 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] ; SSE-NEXT: movaps %xmm2, %xmm6 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1],xmm2[1] ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3] ; SSE-NEXT: movaps {{.*#+}} xmm7 ; SSE-NEXT: movaps %xmm0, %xmm2 @@ -213,7 +213,7 @@ ; SSE-NEXT: orps %xmm0, %xmm4 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] ; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: andps %xmm7, %xmm0 ; SSE-NEXT: cvtss2sd %xmm3, %xmm3 ; SSE-NEXT: andps %xmm8, %xmm3 @@ -260,7 +260,7 @@ ; SSE-NEXT: orps %xmm6, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: movaps %xmm3, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE-NEXT: andps %xmm5, %xmm1 ; SSE-NEXT: xorps %xmm6, %xmm6 ; SSE-NEXT: cvtsd2ss %xmm2, %xmm6 Index: llvm/trunk/test/CodeGen/X86/complex-fastmath.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/complex-fastmath.ll +++ llvm/trunk/test/CodeGen/X86/complex-fastmath.ll @@ -57,9 +57,9 @@ define <2 x double> @complex_square_f64(<2 x double>) #0 { ; SSE-LABEL: complex_square_f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: addsd %xmm0, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: mulsd %xmm0, %xmm0 @@ -160,11 +160,11 @@ define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 { ; SSE-LABEL: complex_mul_f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] -; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE-NEXT: movapd %xmm1, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] +; SSE-NEXT: movapd %xmm3, %xmm4 ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm1 Index: llvm/trunk/test/CodeGen/X86/ftrunc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/ftrunc.ll +++ llvm/trunk/test/CodeGen/X86/ftrunc.ll @@ -67,7 +67,7 @@ ; SSE2-NEXT: cvttss2si %xmm1, %rax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: cvttss2si %xmm2, %rax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -105,10 +105,10 @@ define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v2f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rax ; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 @@ -116,7 +116,7 @@ ; SSE2-NEXT: cvttsd2si %xmm1, %rdx ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rax, %rdx -; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm0, %xmm1 ; SSE2-NEXT: subsd %xmm2, %xmm1 ; SSE2-NEXT: cvttsd2si %xmm1, %rax ; SSE2-NEXT: xorq %rcx, %rax @@ -155,10 +155,10 @@ define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v4f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE2-NEXT: movaps %xmm3, %xmm4 +; SSE2-NEXT: movapd %xmm3, %xmm4 ; SSE2-NEXT: subsd %xmm2, %xmm4 ; SSE2-NEXT: cvttsd2si %xmm4, %rcx ; SSE2-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 @@ -166,23 +166,23 @@ ; SSE2-NEXT: cvttsd2si %xmm3, %rax ; SSE2-NEXT: ucomisd %xmm2, %xmm3 ; SSE2-NEXT: cmovaeq %rcx, %rax -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rsi ; SSE2-NEXT: xorq %rdx, %rsi ; SSE2-NEXT: cvttsd2si %xmm1, %rcx ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rsi, %rcx -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rsi ; SSE2-NEXT: xorq %rdx, %rsi ; SSE2-NEXT: cvttsd2si %xmm1, %rdi ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rsi, %rdi -; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm0, %xmm1 ; SSE2-NEXT: subsd %xmm2, %xmm1 ; SSE2-NEXT: cvttsd2si %xmm1, %rsi ; SSE2-NEXT: xorq %rdx, %rsi Index: llvm/trunk/test/CodeGen/X86/haddsub-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/haddsub-2.ll +++ llvm/trunk/test/CodeGen/X86/haddsub-2.ll @@ -902,7 +902,7 @@ ; SSE-LABEL: not_a_hsub_2: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] ; SSE-NEXT: subss %xmm3, %xmm2 @@ -912,7 +912,7 @@ ; SSE-NEXT: movaps %xmm1, %xmm2 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3] ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE-NEXT: subss %xmm3, %xmm2 ; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE-NEXT: subss %xmm3, %xmm1 @@ -958,11 +958,11 @@ define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) { ; SSE-LABEL: not_a_hsub_3: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE-NEXT: movapd %xmm1, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: subsd %xmm0, %xmm2 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movapd %xmm2, %xmm0 Index: llvm/trunk/test/CodeGen/X86/haddsub-3.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/haddsub-3.ll +++ llvm/trunk/test/CodeGen/X86/haddsub-3.ll @@ -10,7 +10,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -19,7 +19,7 @@ ; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSSE3-NEXT: addps %xmm0, %xmm1 ; SSSE3-NEXT: movaps %xmm1, %xmm0 -; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSSE3-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSSE3-NEXT: addss %xmm1, %xmm0 ; SSSE3-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/haddsub-undef.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/haddsub-undef.ll +++ llvm/trunk/test/CodeGen/X86/haddsub-undef.ll @@ -102,8 +102,8 @@ define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: test5_undef: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: addsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -168,7 +168,7 @@ ; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] Index: llvm/trunk/test/CodeGen/X86/pr11334.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr11334.ll +++ llvm/trunk/test/CodeGen/X86/pr11334.ll @@ -25,7 +25,7 @@ ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 ; SSE-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: fldl -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -432,9 +432,9 @@ ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm3, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE-NEXT: movaps %xmm1, %xmm5 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE-NEXT: subss %xmm5, %xmm4 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm3, %xmm5 Index: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll +++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll @@ -1575,7 +1575,7 @@ ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -1858,7 +1858,7 @@ ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movaps %xmm2, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm2[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] @@ -1874,7 +1874,7 @@ ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] Index: llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -88,7 +88,7 @@ ; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -473,7 +473,7 @@ ; NO-FMA-NEXT: mulsd {{.*}}(%rip), %xmm1 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -604,7 +604,7 @@ ; NO-FMA-NEXT: addsd {{.*}}(%rip), %xmm1 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -737,7 +737,7 @@ ; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -1210,7 +1210,7 @@ ; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll @@ -41,7 +41,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -51,7 +51,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -78,7 +78,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -89,7 +89,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -126,7 +126,7 @@ ; SSE2-NEXT: addps %xmm3, %xmm1 ; SSE2-NEXT: addps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -139,7 +139,7 @@ ; SSE41-NEXT: addps %xmm3, %xmm1 ; SSE41-NEXT: addps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -208,7 +208,7 @@ ; SSE2-LABEL: test_v4f32_zero: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -218,7 +218,7 @@ ; SSE41-LABEL: test_v4f32_zero: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -246,7 +246,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -257,7 +257,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -295,7 +295,7 @@ ; SSE2-NEXT: addps %xmm2, %xmm0 ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -308,7 +308,7 @@ ; SSE41-NEXT: addps %xmm2, %xmm0 ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -378,7 +378,7 @@ ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -388,7 +388,7 @@ ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -416,7 +416,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -427,7 +427,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -465,7 +465,7 @@ ; SSE2-NEXT: addps %xmm2, %xmm0 ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -478,7 +478,7 @@ ; SSE41-NEXT: addps %xmm2, %xmm0 ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -520,8 +520,8 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; SSE2-LABEL: test_v2f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -549,7 +549,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm2, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -588,7 +588,7 @@ ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm2, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -637,7 +637,7 @@ ; SSE2-NEXT: addpd %xmm2, %xmm4 ; SSE2-NEXT: addpd %xmm1, %xmm4 ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1] ; SSE2-NEXT: addpd %xmm4, %xmm0 ; SSE2-NEXT: retq ; @@ -689,8 +689,8 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; SSE2-LABEL: test_v2f64_zero: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -718,7 +718,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -757,7 +757,7 @@ ; SSE2-NEXT: addpd %xmm2, %xmm0 ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -806,7 +806,7 @@ ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -858,8 +858,8 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE2-LABEL: test_v2f64_undef: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -887,7 +887,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -926,7 +926,7 @@ ; SSE2-NEXT: addpd %xmm2, %xmm0 ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -975,7 +975,7 @@ ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll @@ -50,7 +50,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -62,7 +62,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -101,7 +101,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: addss %xmm3, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -110,7 +110,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -122,7 +122,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm3, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE41-NEXT: addss %xmm3, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -130,7 +130,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -187,7 +187,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm5, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE2-NEXT: addss %xmm5, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -196,7 +196,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -205,7 +205,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -214,7 +214,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm4, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 @@ -226,7 +226,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm5, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE41-NEXT: addss %xmm5, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -234,7 +234,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -242,7 +242,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -250,7 +250,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm4, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE41-NEXT: addss %xmm4, %xmm0 @@ -379,7 +379,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addss %xmm2, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -392,7 +392,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addss %xmm2, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -434,7 +434,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm2, %xmm3 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: addss %xmm3, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -443,7 +443,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -456,7 +456,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm3 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: addss %xmm3, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -464,7 +464,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -524,7 +524,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm4, %xmm5 ; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE2-NEXT: addss %xmm5, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 @@ -533,7 +533,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -542,7 +542,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -551,7 +551,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -564,7 +564,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm5 ; SSE41-NEXT: movaps %xmm0, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE41-NEXT: addss %xmm5, %xmm4 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm4, %xmm0 @@ -572,7 +572,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -580,7 +580,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -588,7 +588,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -709,7 +709,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: addss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -720,7 +720,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: addss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -756,7 +756,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: addss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -765,7 +765,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -776,7 +776,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: addss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -784,7 +784,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -838,7 +838,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: addss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm5, %xmm0 @@ -847,7 +847,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -856,7 +856,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -865,7 +865,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -876,7 +876,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: addss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm5, %xmm0 @@ -884,7 +884,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -892,7 +892,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -900,7 +900,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmax-nnan.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: maxps %xmm2, %xmm0 ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: maxps %xmm2, %xmm0 ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: maxpd %xmm3, %xmm1 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmax.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmax.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmax.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: maxps %xmm2, %xmm0 ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: maxps %xmm2, %xmm0 ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: maxpd %xmm3, %xmm1 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmin-nnan.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: minps %xmm2, %xmm0 ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: minps %xmm2, %xmm0 ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: minpd %xmm3, %xmm1 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmin.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmin.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmin.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: minps %xmm2, %xmm0 ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: minps %xmm2, %xmm0 ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: minpd %xmm3, %xmm1 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmul-fast.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: mulps %xmm3, %xmm1 ; SSE2-NEXT: mulps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: mulps %xmm3, %xmm1 ; SSE41-NEXT: mulps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -221,7 +221,7 @@ ; SSE2-LABEL: test_v4f32_zero: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -231,7 +231,7 @@ ; SSE41-LABEL: test_v4f32_zero: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -262,7 +262,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -273,7 +273,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -314,7 +314,7 @@ ; SSE2-NEXT: mulps %xmm2, %xmm0 ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -327,7 +327,7 @@ ; SSE41-NEXT: mulps %xmm2, %xmm0 ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -402,7 +402,7 @@ ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -412,7 +412,7 @@ ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -443,7 +443,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -454,7 +454,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -495,7 +495,7 @@ ; SSE2-NEXT: mulps %xmm2, %xmm0 ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -508,7 +508,7 @@ ; SSE41-NEXT: mulps %xmm2, %xmm0 ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -552,8 +552,8 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -577,7 +577,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm2, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -611,7 +611,7 @@ ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm2, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -652,7 +652,7 @@ ; SSE-NEXT: mulpd %xmm2, %xmm4 ; SSE-NEXT: mulpd %xmm1, %xmm4 ; SSE-NEXT: movapd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1] ; SSE-NEXT: mulpd %xmm4, %xmm0 ; SSE-NEXT: retq ; @@ -692,8 +692,8 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_zero: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -718,7 +718,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -753,7 +753,7 @@ ; SSE-NEXT: mulpd %xmm2, %xmm0 ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -795,7 +795,7 @@ ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -835,8 +835,8 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -861,7 +861,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -896,7 +896,7 @@ ; SSE-NEXT: mulpd %xmm2, %xmm0 ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -938,7 +938,7 @@ ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll @@ -50,7 +50,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -62,7 +62,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -101,7 +101,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: mulss %xmm3, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -110,7 +110,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -122,7 +122,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE41-NEXT: mulss %xmm3, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -130,7 +130,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -187,7 +187,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE2-NEXT: mulss %xmm5, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -196,7 +196,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -205,7 +205,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -214,7 +214,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm4, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 @@ -226,7 +226,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE41-NEXT: mulss %xmm5, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -234,7 +234,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -242,7 +242,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -250,7 +250,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm4, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 @@ -371,7 +371,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: mulss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -382,7 +382,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: mulss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -418,7 +418,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: mulss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -427,7 +427,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -438,7 +438,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: mulss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -446,7 +446,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -500,7 +500,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: mulss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 @@ -509,7 +509,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -518,7 +518,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -527,7 +527,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -538,7 +538,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: mulss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 @@ -546,7 +546,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -554,7 +554,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -562,7 +562,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -679,7 +679,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: mulss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -690,7 +690,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: mulss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -726,7 +726,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: mulss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -735,7 +735,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -746,7 +746,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: mulss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -754,7 +754,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -808,7 +808,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: mulss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 @@ -817,7 +817,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -826,7 +826,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -835,7 +835,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -846,7 +846,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: mulss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 @@ -854,7 +854,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -862,7 +862,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -870,7 +870,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -1182,8 +1182,8 @@ define double @test_v2f64_one(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1206,8 +1206,8 @@ define double @test_v4f64_one(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] @@ -1243,8 +1243,8 @@ define double @test_v8f64_one(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE-NEXT: movapd %xmm0, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm4 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] Index: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll +++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll @@ -74,7 +74,7 @@ ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) Index: llvm/trunk/test/CodeGen/X86/widen_conv-4.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_conv-4.ll +++ llvm/trunk/test/CodeGen/X86/widen_conv-4.ll @@ -19,7 +19,7 @@ ; X86-SSE2-NEXT: movups %xmm0, (%eax) ; X86-SSE2-NEXT: movss %xmm2, 16(%eax) ; X86-SSE2-NEXT: movaps %xmm2, %xmm0 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm2[1],xmm0[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] ; X86-SSE2-NEXT: movss %xmm0, 24(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm2, 20(%eax) @@ -100,7 +100,7 @@ ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax)