Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -837,19 +837,6 @@ Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; } -// TODO: This is largely to trick fastisel into ignoring the pattern. -def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2), - (X86Unpckh node:$src1, node:$src2), [{ - return N->getOperand(0) == N->getOperand(1); -}]>; - -let Predicates = [UseSSE2] in { - // TODO: This is a hack pattern to allow lowering to emit unpckh instead of - // movhlps for sse2 without changing a bunch of tests. - def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)), - (MOVHLPSrr VR128:$src, VR128:$src)>; -} - //===----------------------------------------------------------------------===// // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// Index: test/CodeGen/X86/buildvec-insertvec.ll =================================================================== --- test/CodeGen/X86/buildvec-insertvec.ll +++ test/CodeGen/X86/buildvec-insertvec.ll @@ -38,7 +38,7 @@ ; SSE2-LABEL: test_negative_zero_1: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero Index: test/CodeGen/X86/cast-vsel.ll =================================================================== --- test/CodeGen/X86/cast-vsel.ll +++ test/CodeGen/X86/cast-vsel.ll @@ -139,7 +139,7 @@ ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm5 ; SSE41-NEXT: cvtps2pd %xmm5, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE41-NEXT: cvtps2pd %xmm5, %xmm1 ; SSE41-NEXT: retq ; Index: test/CodeGen/X86/combine-fcopysign.ll =================================================================== --- test/CodeGen/X86/combine-fcopysign.ll +++ test/CodeGen/X86/combine-fcopysign.ll @@ -197,7 +197,7 @@ ; SSE-NEXT: cvtss2sd %xmm2, %xmm4 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] ; SSE-NEXT: movaps %xmm2, %xmm6 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1],xmm2[1] ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3] ; SSE-NEXT: movaps {{.*#+}} xmm7 ; SSE-NEXT: movaps %xmm0, %xmm2 @@ -205,7 +205,7 @@ ; SSE-NEXT: movaps {{.*#+}} xmm8 = [-0.000000e+00,-0.000000e+00] ; SSE-NEXT: andps %xmm8, %xmm4 ; SSE-NEXT: orps %xmm4, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: andps %xmm7, %xmm0 ; SSE-NEXT: xorps %xmm4, %xmm4 ; SSE-NEXT: cvtss2sd %xmm5, %xmm4 @@ -213,7 +213,7 @@ ; SSE-NEXT: orps %xmm0, %xmm4 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] ; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: andps %xmm7, %xmm0 ; SSE-NEXT: cvtss2sd %xmm3, %xmm3 ; SSE-NEXT: andps %xmm8, %xmm3 @@ -254,13 +254,13 @@ ; SSE-NEXT: orps %xmm6, %xmm0 ; SSE-NEXT: movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] ; SSE-NEXT: andps %xmm5, %xmm6 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvtsd2ss %xmm1, %xmm1 ; SSE-NEXT: andps %xmm4, %xmm1 ; SSE-NEXT: orps %xmm6, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: movaps %xmm3, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE-NEXT: andps %xmm5, %xmm1 ; SSE-NEXT: xorps %xmm6, %xmm6 ; SSE-NEXT: cvtsd2ss %xmm2, %xmm6 @@ -269,7 +269,7 @@ ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3] ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE-NEXT: andps %xmm5, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsd2ss %xmm2, %xmm1 ; SSE-NEXT: andps %xmm4, %xmm1 Index: test/CodeGen/X86/complex-fastmath.ll =================================================================== --- test/CodeGen/X86/complex-fastmath.ll +++ test/CodeGen/X86/complex-fastmath.ll @@ -57,9 +57,9 @@ define <2 x double> @complex_square_f64(<2 x double>) #0 { ; SSE-LABEL: complex_square_f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: addsd %xmm0, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: mulsd %xmm0, %xmm0 @@ -160,11 +160,11 @@ define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 { ; SSE-LABEL: complex_mul_f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] -; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE-NEXT: movapd %xmm1, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] +; SSE-NEXT: movapd %xmm3, %xmm4 ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm1 Index: test/CodeGen/X86/fma.ll =================================================================== --- test/CodeGen/X86/fma.ll +++ test/CodeGen/X86/fma.ll @@ -344,15 +344,15 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x30] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel @@ -613,15 +613,15 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x10] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x60] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel @@ -681,15 +681,15 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x40] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x50] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel @@ -1121,14 +1121,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x60] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel @@ -1185,14 +1185,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel @@ -1249,14 +1249,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel @@ -1313,14 +1313,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x70] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel @@ -1447,15 +1447,15 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x30] ; FMACALL64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x04,0x24] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x20] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel @@ -1603,15 +1603,15 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x60] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel @@ -1633,15 +1633,15 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x30] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x40] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel @@ -1893,14 +1893,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x30] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xa0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel @@ -1921,14 +1921,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel @@ -1949,14 +1949,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel @@ -1977,14 +1977,14 @@ ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x60] -; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] +; FMACALL64-NEXT: unpckhpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x15,0xc0] ; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x70] -; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] +; FMACALL64-NEXT: unpckhpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x15,0xc9] ; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] ; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] -; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] +; FMACALL64-NEXT: unpckhpd %xmm2, %xmm2 ## encoding: [0x66,0x0f,0x15,0xd2] ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] ; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel Index: test/CodeGen/X86/fp128-extract.ll =================================================================== --- test/CodeGen/X86/fp128-extract.ll +++ test/CodeGen/X86/fp128-extract.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: callq __extenddftf2 ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq __extenddftf2 ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload Index: test/CodeGen/X86/ftrunc.ll =================================================================== --- test/CodeGen/X86/ftrunc.ll +++ test/CodeGen/X86/ftrunc.ll @@ -67,7 +67,7 @@ ; SSE2-NEXT: cvttss2si %xmm1, %rax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: cvttss2si %xmm2, %rax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -105,10 +105,10 @@ define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v2f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rax ; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 @@ -116,7 +116,7 @@ ; SSE2-NEXT: cvttsd2si %xmm1, %rdx ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rax, %rdx -; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm0, %xmm1 ; SSE2-NEXT: subsd %xmm2, %xmm1 ; SSE2-NEXT: cvttsd2si %xmm1, %rax ; SSE2-NEXT: xorq %rcx, %rax @@ -155,10 +155,10 @@ define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v4f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE2-NEXT: movaps %xmm3, %xmm4 +; SSE2-NEXT: movapd %xmm3, %xmm4 ; SSE2-NEXT: subsd %xmm2, %xmm4 ; SSE2-NEXT: cvttsd2si %xmm4, %rcx ; SSE2-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 @@ -166,23 +166,23 @@ ; SSE2-NEXT: cvttsd2si %xmm3, %rax ; SSE2-NEXT: ucomisd %xmm2, %xmm3 ; SSE2-NEXT: cmovaeq %rcx, %rax -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rsi ; SSE2-NEXT: xorq %rdx, %rsi ; SSE2-NEXT: cvttsd2si %xmm1, %rcx ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rsi, %rcx -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rsi ; SSE2-NEXT: xorq %rdx, %rsi ; SSE2-NEXT: cvttsd2si %xmm1, %rdi ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rsi, %rdi -; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm0, %xmm1 ; SSE2-NEXT: subsd %xmm2, %xmm1 ; SSE2-NEXT: cvttsd2si %xmm1, %rsi ; SSE2-NEXT: xorq %rdx, %rsi @@ -299,7 +299,7 @@ ; SSE2-LABEL: trunc_signed_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm0, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rcx ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0 @@ -325,10 +325,10 @@ ; SSE2-LABEL: trunc_signed_v4f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm1, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE2-NEXT: cvttsd2si %xmm1, %rcx ; SSE2-NEXT: cvttsd2si %xmm0, %rdx -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rsi ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sdq %rdx, %xmm0 Index: test/CodeGen/X86/haddsub-2.ll =================================================================== --- test/CodeGen/X86/haddsub-2.ll +++ test/CodeGen/X86/haddsub-2.ll @@ -902,7 +902,7 @@ ; SSE-LABEL: not_a_hsub_2: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] ; SSE-NEXT: subss %xmm3, %xmm2 @@ -912,7 +912,7 @@ ; SSE-NEXT: movaps %xmm1, %xmm2 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3] ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE-NEXT: subss %xmm3, %xmm2 ; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE-NEXT: subss %xmm3, %xmm1 @@ -958,11 +958,11 @@ define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) { ; SSE-LABEL: not_a_hsub_3: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE-NEXT: movapd %xmm1, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: subsd %xmm0, %xmm2 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movapd %xmm2, %xmm0 Index: test/CodeGen/X86/haddsub-3.ll =================================================================== --- test/CodeGen/X86/haddsub-3.ll +++ test/CodeGen/X86/haddsub-3.ll @@ -10,7 +10,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -19,7 +19,7 @@ ; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSSE3-NEXT: addps %xmm0, %xmm1 ; SSSE3-NEXT: movaps %xmm1, %xmm0 -; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSSE3-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSSE3-NEXT: addss %xmm1, %xmm0 ; SSSE3-NEXT: retq ; Index: test/CodeGen/X86/haddsub-undef.ll =================================================================== --- test/CodeGen/X86/haddsub-undef.ll +++ test/CodeGen/X86/haddsub-undef.ll @@ -102,8 +102,8 @@ define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: test5_undef: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: addsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -168,7 +168,7 @@ ; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] Index: test/CodeGen/X86/half.ll =================================================================== --- test/CodeGen/X86/half.ll +++ test/CodeGen/X86/half.ll @@ -575,7 +575,7 @@ ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee ; BWON-NOF16C-NEXT: movl %eax, %r14d ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWON-NOF16C-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee ; BWON-NOF16C-NEXT: movl %eax, %r15d ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload @@ -608,7 +608,7 @@ ; BWOFF-NEXT: callq __gnu_f2h_ieee ; BWOFF-NEXT: movw %ax, %r14w ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWOFF-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; BWOFF-NEXT: callq __gnu_f2h_ieee ; BWOFF-NEXT: movw %ax, %r15w ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload @@ -662,7 +662,7 @@ ; CHECK-I686-NEXT: calll __gnu_f2h_ieee ; CHECK-I686-NEXT: movw %ax, %si ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-I686-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-I686-NEXT: movss %xmm0, (%esp) ; CHECK-I686-NEXT: calll __gnu_f2h_ieee ; CHECK-I686-NEXT: movw %ax, %di @@ -700,11 +700,11 @@ ; BWON-NOF16C-NEXT: movq %rdi, %rbx ; BWON-NOF16C-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; BWON-NOF16C-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWON-NOF16C-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; BWON-NOF16C-NEXT: callq __truncdfhf2 ; BWON-NOF16C-NEXT: movl %eax, %r14d ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWON-NOF16C-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; BWON-NOF16C-NEXT: callq __truncdfhf2 ; BWON-NOF16C-NEXT: movl %eax, %r15d ; BWON-NOF16C-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload @@ -733,11 +733,11 @@ ; BWOFF-NEXT: movq %rdi, %rbx ; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; BWOFF-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWOFF-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; BWOFF-NEXT: callq __truncdfhf2 ; BWOFF-NEXT: movw %ax, %r14w ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWOFF-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; BWOFF-NEXT: callq __truncdfhf2 ; BWOFF-NEXT: movw %ax, %r15w ; BWOFF-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload Index: test/CodeGen/X86/nontemporal-2.ll =================================================================== --- test/CodeGen/X86/nontemporal-2.ll +++ test/CodeGen/X86/nontemporal-2.ll @@ -569,7 +569,7 @@ ; ; SSE4A-LABEL: test_extract_f64: ; SSE4A: # %bb.0: -; SSE4A-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE4A-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: retq ; Index: test/CodeGen/X86/pr11334.ll =================================================================== --- test/CodeGen/X86/pr11334.ll +++ test/CodeGen/X86/pr11334.ll @@ -21,11 +21,11 @@ ; SSE-LABEL: v3f2d_ext_vec: ; SSE: # %bb.0: # %entry ; SSE-NEXT: cvtps2pd %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 ; SSE-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: fldl -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq @@ -43,7 +43,7 @@ ; SSE-LABEL: v4f2d_ext_vec: ; SSE: # %bb.0: # %entry ; SSE-NEXT: cvtps2pd %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvtps2pd %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq @@ -62,9 +62,9 @@ ; SSE: # %bb.0: # %entry ; SSE-NEXT: cvtps2pd %xmm0, %xmm5 ; SSE-NEXT: cvtps2pd %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvtps2pd %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvtps2pd %xmm1, %xmm3 ; SSE-NEXT: movaps %xmm5, %xmm0 ; SSE-NEXT: movaps %xmm4, %xmm1 Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -2712,7 +2712,7 @@ ; GENERIC: # %bb.0: ; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2720,7 +2720,7 @@ ; ATOM: # %bb.0: ; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; ATOM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; @@ -2728,7 +2728,7 @@ ; SLM: # %bb.0: ; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SLM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; @@ -2736,7 +2736,7 @@ ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; @@ -2751,7 +2751,7 @@ ; HASWELL-SSE: # %bb.0: ; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; @@ -2766,7 +2766,7 @@ ; BROADWELL-SSE: # %bb.0: ; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; @@ -2781,7 +2781,7 @@ ; SKYLAKE-SSE: # %bb.0: ; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; @@ -2796,7 +2796,7 @@ ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; @@ -2811,7 +2811,7 @@ ; BTVER2-SSE: # %bb.0: ; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] +; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] ; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; @@ -2826,7 +2826,7 @@ ; ZNVER1-SSE: # %bb.0: ; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] ; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] ; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; Index: test/CodeGen/X86/sse2-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -5406,7 +5406,7 @@ ; X86-SSE-LABEL: test_mm_storeh_sd: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] +; X86-SSE-NEXT: unpckhpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x15,0xc0] ; X86-SSE-NEXT: # xmm0 = xmm0[1,1] ; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] ; X86-SSE-NEXT: retl # encoding: [0xc3] @@ -5429,7 +5429,7 @@ ; ; X64-SSE-LABEL: test_mm_storeh_sd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] +; X64-SSE-NEXT: unpckhpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x15,0xc0] ; X64-SSE-NEXT: # xmm0 = xmm0[1,1] ; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] ; X64-SSE-NEXT: retq # encoding: [0xc3] Index: test/CodeGen/X86/sse3-avx-addsub-2.ll =================================================================== --- test/CodeGen/X86/sse3-avx-addsub-2.ll +++ test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -268,8 +268,8 @@ define <4 x float> @test11(<4 x float> %A, <4 x float> %B) { ; SSE-LABEL: test11: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: subss %xmm1, %xmm0 ; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] ; SSE-NEXT: retq @@ -356,8 +356,8 @@ ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: subss %xmm1, %xmm0 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: movaps %xmm2, %xmm0 @@ -424,9 +424,9 @@ ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm3, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE-NEXT: movaps %xmm1, %xmm5 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE-NEXT: subss %xmm5, %xmm4 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm3, %xmm5 Index: test/CodeGen/X86/sse_partial_update.ll =================================================================== --- test/CodeGen/X86/sse_partial_update.ll +++ test/CodeGen/X86/sse_partial_update.ll @@ -78,7 +78,7 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: sqrtsd %xmm0, %xmm1 ; CHECK-NEXT: cvtsd2ss %xmm1, %xmm2 -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm2, %xmm0 Index: test/CodeGen/X86/var-permute-128.ll =================================================================== --- test/CodeGen/X86/var-permute-128.ll +++ test/CodeGen/X86/var-permute-128.ll @@ -405,7 +405,7 @@ ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 ; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm2[0,0] -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 ; SSE41-NEXT: movapd %xmm2, %xmm0 ; SSE41-NEXT: retq Index: test/CodeGen/X86/vec_extract.ll =================================================================== --- test/CodeGen/X86/vec_extract.ll +++ test/CodeGen/X86/vec_extract.ll @@ -33,7 +33,7 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movaps (%eax), %xmm0 ; X32-NEXT: addps %xmm0, %xmm0 -; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X32-NEXT: movss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax @@ -43,7 +43,7 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: movaps (%rdi), %xmm0 ; X64-NEXT: addps %xmm0, %xmm0 -; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X64-NEXT: retq entry: %tmp = load <4 x float>, <4 x float>* %F @@ -78,7 +78,7 @@ ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp ; X32-NEXT: calll foo -; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X32-NEXT: addsd {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movsd %xmm0, (%esp) ; X32-NEXT: fldl (%esp) @@ -90,7 +90,7 @@ ; X64-NEXT: pushq %rax ; X64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill ; X64-NEXT: callq foo -; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X64-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload ; X64-NEXT: popq %rax ; X64-NEXT: retq Index: test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- test/CodeGen/X86/vec_fp_to_int.ll +++ test/CodeGen/X86/vec_fp_to_int.ll @@ -21,7 +21,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] @@ -129,13 +129,13 @@ ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] @@ -264,8 +264,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -348,8 +348,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -429,8 +429,8 @@ ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm1, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -508,8 +508,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -581,8 +581,8 @@ ; SSE-NEXT: ucomisd %xmm3, %xmm2 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] -; SSE-NEXT: movaps %xmm2, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: movapd %xmm2, %xmm4 ; SSE-NEXT: subsd %xmm3, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx @@ -599,8 +599,8 @@ ; SSE-NEXT: ucomisd %xmm3, %xmm1 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; SSE-NEXT: movaps %xmm1, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: movapd %xmm1, %xmm4 ; SSE-NEXT: subsd %xmm3, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx @@ -756,8 +756,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm1 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; SSE-NEXT: movaps %xmm1, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: movapd %xmm1, %xmm4 ; SSE-NEXT: subsd %xmm2, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx @@ -774,8 +774,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm4 ; SSE-NEXT: subsd %xmm2, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx @@ -1018,7 +1018,7 @@ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] @@ -1126,7 +1126,7 @@ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] @@ -1312,7 +1312,7 @@ ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -1550,7 +1550,7 @@ ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movaps %xmm2, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm2[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] @@ -1566,7 +1566,7 @@ ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] @@ -1697,7 +1697,7 @@ ; SSE-NEXT: ucomiss %xmm1, %xmm3 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 ; SSE-NEXT: cvttss2si %xmm4, %rcx @@ -1875,7 +1875,7 @@ ; SSE-NEXT: ucomiss %xmm1, %xmm3 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 ; SSE-NEXT: cvttss2si %xmm4, %rcx Index: test/CodeGen/X86/vector-reduce-fadd-fast.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fadd-fast.ll +++ test/CodeGen/X86/vector-reduce-fadd-fast.ll @@ -41,7 +41,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -51,7 +51,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -78,7 +78,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -89,7 +89,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -126,7 +126,7 @@ ; SSE2-NEXT: addps %xmm3, %xmm1 ; SSE2-NEXT: addps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -139,7 +139,7 @@ ; SSE41-NEXT: addps %xmm3, %xmm1 ; SSE41-NEXT: addps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -208,7 +208,7 @@ ; SSE2-LABEL: test_v4f32_zero: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -218,7 +218,7 @@ ; SSE41-LABEL: test_v4f32_zero: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -246,7 +246,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -257,7 +257,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -295,7 +295,7 @@ ; SSE2-NEXT: addps %xmm2, %xmm0 ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -308,7 +308,7 @@ ; SSE41-NEXT: addps %xmm2, %xmm0 ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -378,7 +378,7 @@ ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -388,7 +388,7 @@ ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -416,7 +416,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -427,7 +427,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -465,7 +465,7 @@ ; SSE2-NEXT: addps %xmm2, %xmm0 ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -478,7 +478,7 @@ ; SSE41-NEXT: addps %xmm2, %xmm0 ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -520,8 +520,8 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; SSE2-LABEL: test_v2f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -549,7 +549,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm2, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -588,7 +588,7 @@ ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm2, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -637,7 +637,7 @@ ; SSE2-NEXT: addpd %xmm2, %xmm4 ; SSE2-NEXT: addpd %xmm1, %xmm4 ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1] ; SSE2-NEXT: addpd %xmm4, %xmm0 ; SSE2-NEXT: retq ; @@ -689,8 +689,8 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; SSE2-LABEL: test_v2f64_zero: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -718,7 +718,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -757,7 +757,7 @@ ; SSE2-NEXT: addpd %xmm2, %xmm0 ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -806,7 +806,7 @@ ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -858,8 +858,8 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE2-LABEL: test_v2f64_undef: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -887,7 +887,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -926,7 +926,7 @@ ; SSE2-NEXT: addpd %xmm2, %xmm0 ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -975,7 +975,7 @@ ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; Index: test/CodeGen/X86/vector-reduce-fadd.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fadd.ll +++ test/CodeGen/X86/vector-reduce-fadd.ll @@ -50,7 +50,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -62,7 +62,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -101,7 +101,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: addss %xmm3, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -110,7 +110,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -122,7 +122,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm3, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE41-NEXT: addss %xmm3, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -130,7 +130,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -187,7 +187,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm5, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE2-NEXT: addss %xmm5, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -196,7 +196,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -205,7 +205,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -214,7 +214,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm4, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 @@ -226,7 +226,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm5, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE41-NEXT: addss %xmm5, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -234,7 +234,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -242,7 +242,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -250,7 +250,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm4, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE41-NEXT: addss %xmm4, %xmm0 @@ -379,7 +379,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addss %xmm2, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -392,7 +392,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addss %xmm2, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -434,7 +434,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm2, %xmm3 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: addss %xmm3, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -443,7 +443,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -456,7 +456,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm3 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: addss %xmm3, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -464,7 +464,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -524,7 +524,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm4, %xmm5 ; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE2-NEXT: addss %xmm5, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 @@ -533,7 +533,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -542,7 +542,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -551,7 +551,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -564,7 +564,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm5 ; SSE41-NEXT: movaps %xmm0, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE41-NEXT: addss %xmm5, %xmm4 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm4, %xmm0 @@ -572,7 +572,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -580,7 +580,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -588,7 +588,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -709,7 +709,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: addss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -720,7 +720,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: addss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -756,7 +756,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: addss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -765,7 +765,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -776,7 +776,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: addss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -784,7 +784,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -838,7 +838,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: addss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm5, %xmm0 @@ -847,7 +847,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -856,7 +856,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -865,7 +865,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -876,7 +876,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: addss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm5, %xmm0 @@ -884,7 +884,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -892,7 +892,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -900,7 +900,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -986,7 +986,7 @@ ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1011,10 +1011,10 @@ ; SSE-LABEL: test_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -1049,16 +1049,16 @@ ; SSE-LABEL: test_v8f64: ; SSE: # %bb.0: ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: retq ; @@ -1109,28 +1109,28 @@ ; SSE: # %bb.0: ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: addsd %xmm5, %xmm0 ; SSE-NEXT: addsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: addsd %xmm6, %xmm0 ; SSE-NEXT: addsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: addsd %xmm7, %xmm0 ; SSE-NEXT: addsd %xmm8, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm8 = xmm8[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm8 = xmm8[1,1] ; SSE-NEXT: addsd %xmm8, %xmm0 ; SSE-NEXT: retq ; @@ -1214,7 +1214,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm1, %xmm1 ; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1242,10 +1242,10 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm2, %xmm2 ; SSE-NEXT: addsd %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1283,16 +1283,16 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm4, %xmm4 ; SSE-NEXT: addsd %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: retq ; @@ -1345,28 +1345,28 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm8, %xmm8 ; SSE-NEXT: addsd %xmm0, %xmm8 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm8, %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: addsd %xmm5, %xmm0 ; SSE-NEXT: addsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: addsd %xmm6, %xmm0 ; SSE-NEXT: addsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: addsd %xmm7, %xmm0 ; SSE-NEXT: retq ; @@ -1450,7 +1450,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; @@ -1472,10 +1472,10 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1507,16 +1507,16 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: retq ; @@ -1563,28 +1563,28 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: addsd %xmm5, %xmm0 ; SSE-NEXT: addsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: addsd %xmm6, %xmm0 ; SSE-NEXT: addsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: addsd %xmm7, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-reduce-fmax-nnan.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: maxps %xmm2, %xmm0 ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: maxps %xmm2, %xmm0 ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: maxpd %xmm3, %xmm1 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-reduce-fmax.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fmax.ll +++ test/CodeGen/X86/vector-reduce-fmax.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: maxps %xmm2, %xmm0 ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: maxps %xmm2, %xmm0 ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: maxpd %xmm3, %xmm1 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-reduce-fmin-nnan.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: minps %xmm2, %xmm0 ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: minps %xmm2, %xmm0 ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: minpd %xmm3, %xmm1 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-reduce-fmin.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fmin.ll +++ test/CodeGen/X86/vector-reduce-fmin.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: minps %xmm2, %xmm0 ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: minps %xmm2, %xmm0 ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ ; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ ; SSE-NEXT: minpd %xmm3, %xmm1 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-reduce-fmul-fast.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fmul-fast.ll +++ test/CodeGen/X86/vector-reduce-fmul-fast.ll @@ -43,7 +43,7 @@ ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -53,7 +53,7 @@ ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -83,7 +83,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -94,7 +94,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -134,7 +134,7 @@ ; SSE2-NEXT: mulps %xmm3, %xmm1 ; SSE2-NEXT: mulps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -147,7 +147,7 @@ ; SSE41-NEXT: mulps %xmm3, %xmm1 ; SSE41-NEXT: mulps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -221,7 +221,7 @@ ; SSE2-LABEL: test_v4f32_zero: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -231,7 +231,7 @@ ; SSE41-LABEL: test_v4f32_zero: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -262,7 +262,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -273,7 +273,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -314,7 +314,7 @@ ; SSE2-NEXT: mulps %xmm2, %xmm0 ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -327,7 +327,7 @@ ; SSE41-NEXT: mulps %xmm2, %xmm0 ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -402,7 +402,7 @@ ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -412,7 +412,7 @@ ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -443,7 +443,7 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -454,7 +454,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -495,7 +495,7 @@ ; SSE2-NEXT: mulps %xmm2, %xmm0 ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -508,7 +508,7 @@ ; SSE41-NEXT: mulps %xmm2, %xmm0 ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -552,8 +552,8 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -577,7 +577,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm2, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -611,7 +611,7 @@ ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm2, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -652,7 +652,7 @@ ; SSE-NEXT: mulpd %xmm2, %xmm4 ; SSE-NEXT: mulpd %xmm1, %xmm4 ; SSE-NEXT: movapd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1] ; SSE-NEXT: mulpd %xmm4, %xmm0 ; SSE-NEXT: retq ; @@ -692,8 +692,8 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_zero: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -718,7 +718,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -753,7 +753,7 @@ ; SSE-NEXT: mulpd %xmm2, %xmm0 ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -795,7 +795,7 @@ ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -835,8 +835,8 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -861,7 +861,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -896,7 +896,7 @@ ; SSE-NEXT: mulpd %xmm2, %xmm0 ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -938,7 +938,7 @@ ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-reduce-fmul.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fmul.ll +++ test/CodeGen/X86/vector-reduce-fmul.ll @@ -50,7 +50,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -62,7 +62,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -101,7 +101,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: mulss %xmm3, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -110,7 +110,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -122,7 +122,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE41-NEXT: mulss %xmm3, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -130,7 +130,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -187,7 +187,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE2-NEXT: mulss %xmm5, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -196,7 +196,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -205,7 +205,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -214,7 +214,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm4, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 @@ -226,7 +226,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE41-NEXT: mulss %xmm5, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -234,7 +234,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -242,7 +242,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -250,7 +250,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm4, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 @@ -371,7 +371,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: mulss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -382,7 +382,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: mulss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -418,7 +418,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: mulss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -427,7 +427,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -438,7 +438,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: mulss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -446,7 +446,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -500,7 +500,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: mulss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 @@ -509,7 +509,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -518,7 +518,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -527,7 +527,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -538,7 +538,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: mulss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 @@ -546,7 +546,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -554,7 +554,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -562,7 +562,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -679,7 +679,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: mulss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -690,7 +690,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: mulss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -726,7 +726,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: mulss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -735,7 +735,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -746,7 +746,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: mulss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -754,7 +754,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -808,7 +808,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: mulss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 @@ -817,7 +817,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -826,7 +826,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -835,7 +835,7 @@ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -846,7 +846,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: mulss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 @@ -854,7 +854,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -862,7 +862,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -870,7 +870,7 @@ ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -956,7 +956,7 @@ ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -981,10 +981,10 @@ ; SSE-LABEL: test_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -1019,16 +1019,16 @@ ; SSE-LABEL: test_v8f64: ; SSE: # %bb.0: ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: retq ; @@ -1079,28 +1079,28 @@ ; SSE: # %bb.0: ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: mulsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: mulsd %xmm5, %xmm0 ; SSE-NEXT: mulsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: mulsd %xmm6, %xmm0 ; SSE-NEXT: mulsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: mulsd %xmm7, %xmm0 ; SSE-NEXT: mulsd %xmm8, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm8 = xmm8[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm8 = xmm8[1,1] ; SSE-NEXT: mulsd %xmm8, %xmm0 ; SSE-NEXT: retq ; @@ -1182,8 +1182,8 @@ define double @test_v2f64_one(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1206,11 +1206,11 @@ define double @test_v4f64_one(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: movapd %xmm2, %xmm0 ; SSE-NEXT: retq @@ -1243,17 +1243,17 @@ define double @test_v8f64_one(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE-NEXT: movapd %xmm0, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm4 ; SSE-NEXT: mulsd %xmm2, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm4 ; SSE-NEXT: mulsd %xmm3, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm4 ; SSE-NEXT: movapd %xmm4, %xmm0 ; SSE-NEXT: retq @@ -1301,29 +1301,29 @@ define double @test_v16f64_one(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm8 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm8 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd %xmm8, %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: mulsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: mulsd %xmm5, %xmm0 ; SSE-NEXT: mulsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: mulsd %xmm6, %xmm0 ; SSE-NEXT: mulsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: mulsd %xmm7, %xmm0 ; SSE-NEXT: retq ; @@ -1403,7 +1403,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; @@ -1425,10 +1425,10 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1460,16 +1460,16 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: retq ; @@ -1516,28 +1516,28 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: mulsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: mulsd %xmm5, %xmm0 ; SSE-NEXT: mulsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: mulsd %xmm6, %xmm0 ; SSE-NEXT: mulsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: mulsd %xmm7, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-rem.ll =================================================================== --- test/CodeGen/X86/vector-rem.ll +++ test/CodeGen/X86/vector-rem.ll @@ -87,10 +87,10 @@ ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -151,7 +151,7 @@ define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_11: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_11: @@ -208,7 +208,7 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_33: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -309,7 +309,7 @@ define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_3u: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -2222,7 +2222,7 @@ define <4 x float> @combine_undef_input_test9(<4 x float> %a) { ; SSE-LABEL: combine_undef_input_test9: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test9: @@ -2412,7 +2412,7 @@ define <4 x float> @combine_undef_input_test19(<4 x float> %a) { ; SSE-LABEL: combine_undef_input_test19: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test19: Index: test/CodeGen/X86/widen_conv-3.ll =================================================================== --- test/CodeGen/X86/widen_conv-3.ll +++ test/CodeGen/X86/widen_conv-3.ll @@ -74,7 +74,7 @@ ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) @@ -121,7 +121,7 @@ ; X64-SSE2-NEXT: psrad $24, %xmm0 ; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-SSE2-NEXT: movlps %xmm0, (%rdi) -; X64-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X64-SSE2-NEXT: movss %xmm0, 8(%rdi) ; X64-SSE2-NEXT: retq ; Index: test/CodeGen/X86/widen_conv-4.ll =================================================================== --- test/CodeGen/X86/widen_conv-4.ll +++ test/CodeGen/X86/widen_conv-4.ll @@ -19,7 +19,7 @@ ; X86-SSE2-NEXT: movups %xmm0, (%eax) ; X86-SSE2-NEXT: movss %xmm2, 16(%eax) ; X86-SSE2-NEXT: movaps %xmm2, %xmm0 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm2[1],xmm0[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] ; X86-SSE2-NEXT: movss %xmm0, 24(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm2, 20(%eax) @@ -49,7 +49,7 @@ ; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-SSE2-NEXT: movlps %xmm0, 16(%rdi) ; X64-SSE2-NEXT: movups %xmm2, (%rdi) -; X64-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X64-SSE2-NEXT: movss %xmm0, 24(%rdi) ; X64-SSE2-NEXT: retq ; @@ -100,7 +100,7 @@ ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) @@ -146,7 +146,7 @@ ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-SSE2-NEXT: movlps %xmm0, (%rdi) -; X64-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X64-SSE2-NEXT: movss %xmm0, 8(%rdi) ; X64-SSE2-NEXT: retq ;