Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -6511,6 +6511,19 @@ // All domains are valid. return 0xe; + case X86::MOVHLPSrr: + // We can swap domains when both inputs are the same register. + // FIXME: This doesn't catch all the cases we would like. If the input + // register isn't KILLed by the instruction, the two address instruction + // pass puts a COPY on one input. The other input uses the original + // register. This prevents the same physical register from being used by + // both inputs. + if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && + MI.getOperand(0).getSubReg() == 0 && + MI.getOperand(1).getSubReg() == 0 && + MI.getOperand(2).getSubReg() == 0) + return 0x6; + return 0; } return 0; } @@ -6617,6 +6630,20 @@ MI.setDesc(get(table[Domain - 1])); return true; } + case X86::UNPCKHPDrr: + case X86::MOVHLPSrr: + // We just need to commute the instruction which will switch the domains. + if (Domain != dom && Domain != 3 && + MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && + MI.getOperand(0).getSubReg() == 0 && + MI.getOperand(1).getSubReg() == 0 && + MI.getOperand(2).getSubReg() == 0) { + commuteInstruction(MI, false); + return true; + } + // We must always return true for MOVHLPSrr. + if (Opcode == X86::MOVHLPSrr) + return true; } return false; } Index: llvm/trunk/test/CodeGen/X86/sse_partial_update.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse_partial_update.ll +++ llvm/trunk/test/CodeGen/X86/sse_partial_update.ll @@ -78,7 +78,7 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: sqrtsd %xmm0, %xmm1 ; CHECK-NEXT: cvtsd2ss %xmm1, %xmm2 -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm2, %xmm0 Index: llvm/trunk/test/CodeGen/X86/var-permute-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/var-permute-128.ll +++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll @@ -405,7 +405,7 @@ ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 ; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm2[0,0] -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 ; SSE41-NEXT: movapd %xmm2, %xmm0 ; SSE41-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vec_extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_extract.ll +++ llvm/trunk/test/CodeGen/X86/vec_extract.ll @@ -78,7 +78,7 @@ ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp ; X32-NEXT: calll foo -; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X32-NEXT: addsd {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movsd %xmm0, (%esp) ; X32-NEXT: fldl (%esp) @@ -90,7 +90,7 @@ ; X64-NEXT: pushq %rax ; X64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill ; X64-NEXT: callq foo -; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; X64-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload ; X64-NEXT: popq %rax ; X64-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll +++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll @@ -264,8 +264,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -348,8 +348,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -429,8 +429,8 @@ ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm1, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -508,8 +508,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax ; SSE-NEXT: xorq %rcx, %rax @@ -581,8 +581,8 @@ ; SSE-NEXT: ucomisd %xmm3, %xmm2 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] -; SSE-NEXT: movaps %xmm2, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: movapd %xmm2, %xmm4 ; SSE-NEXT: subsd %xmm3, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx @@ -599,8 +599,8 @@ ; SSE-NEXT: ucomisd %xmm3, %xmm1 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; SSE-NEXT: movaps %xmm1, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: movapd %xmm1, %xmm4 ; SSE-NEXT: subsd %xmm3, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx @@ -756,8 +756,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm1 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; SSE-NEXT: movaps %xmm1, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: movapd %xmm1, %xmm4 ; SSE-NEXT: subsd %xmm2, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx @@ -774,8 +774,8 @@ ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: movaps %xmm0, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm4 ; SSE-NEXT: subsd %xmm2, %xmm4 ; SSE-NEXT: cvttsd2si %xmm4, %rcx ; SSE-NEXT: xorq %rax, %rcx Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll @@ -986,7 +986,7 @@ ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1011,10 +1011,10 @@ ; SSE-LABEL: test_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -1049,16 +1049,16 @@ ; SSE-LABEL: test_v8f64: ; SSE: # %bb.0: ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: retq ; @@ -1109,28 +1109,28 @@ ; SSE: # %bb.0: ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: addsd %xmm5, %xmm0 ; SSE-NEXT: addsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: addsd %xmm6, %xmm0 ; SSE-NEXT: addsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: addsd %xmm7, %xmm0 ; SSE-NEXT: addsd %xmm8, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm8 = xmm8[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm8 = xmm8[1,1] ; SSE-NEXT: addsd %xmm8, %xmm0 ; SSE-NEXT: retq ; @@ -1214,7 +1214,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm1, %xmm1 ; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1242,10 +1242,10 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm2, %xmm2 ; SSE-NEXT: addsd %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1283,16 +1283,16 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm4, %xmm4 ; SSE-NEXT: addsd %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: retq ; @@ -1345,28 +1345,28 @@ ; SSE: # %bb.0: ; SSE-NEXT: xorpd %xmm8, %xmm8 ; SSE-NEXT: addsd %xmm0, %xmm8 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd %xmm8, %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: addsd %xmm5, %xmm0 ; SSE-NEXT: addsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: addsd %xmm6, %xmm0 ; SSE-NEXT: addsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: addsd %xmm7, %xmm0 ; SSE-NEXT: retq ; @@ -1450,7 +1450,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; @@ -1472,10 +1472,10 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1507,16 +1507,16 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: retq ; @@ -1563,28 +1563,28 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: addsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: addsd %xmm2, %xmm0 ; SSE-NEXT: addsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: addsd %xmm3, %xmm0 ; SSE-NEXT: addsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: addsd %xmm4, %xmm0 ; SSE-NEXT: addsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: addsd %xmm5, %xmm0 ; SSE-NEXT: addsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: addsd %xmm6, %xmm0 ; SSE-NEXT: addsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: addsd %xmm7, %xmm0 ; SSE-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll @@ -956,7 +956,7 @@ ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -981,10 +981,10 @@ ; SSE-LABEL: test_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -1019,16 +1019,16 @@ ; SSE-LABEL: test_v8f64: ; SSE: # %bb.0: ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: retq ; @@ -1079,28 +1079,28 @@ ; SSE: # %bb.0: ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: mulsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: mulsd %xmm5, %xmm0 ; SSE-NEXT: mulsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: mulsd %xmm6, %xmm0 ; SSE-NEXT: mulsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: mulsd %xmm7, %xmm0 ; SSE-NEXT: mulsd %xmm8, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm8 = xmm8[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm8 = xmm8[1,1] ; SSE-NEXT: mulsd %xmm8, %xmm0 ; SSE-NEXT: retq ; @@ -1210,7 +1210,7 @@ ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] ; SSE-NEXT: mulsd %xmm0, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: movapd %xmm2, %xmm0 ; SSE-NEXT: retq @@ -1247,13 +1247,13 @@ ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm4 ; SSE-NEXT: mulsd %xmm2, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm4 ; SSE-NEXT: mulsd %xmm3, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm4 ; SSE-NEXT: movapd %xmm4, %xmm0 ; SSE-NEXT: retq @@ -1301,29 +1301,29 @@ define double @test_v16f64_one(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm8 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movapd %xmm0, %xmm8 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd %xmm8, %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: mulsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: mulsd %xmm5, %xmm0 ; SSE-NEXT: mulsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: mulsd %xmm6, %xmm0 ; SSE-NEXT: mulsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: mulsd %xmm7, %xmm0 ; SSE-NEXT: retq ; @@ -1403,7 +1403,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; @@ -1425,10 +1425,10 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1460,16 +1460,16 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: retq ; @@ -1516,28 +1516,28 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: mulsd %xmm2, %xmm0 ; SSE-NEXT: mulsd %xmm3, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: mulsd %xmm3, %xmm0 ; SSE-NEXT: mulsd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: mulsd %xmm4, %xmm0 ; SSE-NEXT: mulsd %xmm5, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1,1] ; SSE-NEXT: mulsd %xmm5, %xmm0 ; SSE-NEXT: mulsd %xmm6, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1,1] ; SSE-NEXT: mulsd %xmm6, %xmm0 ; SSE-NEXT: mulsd %xmm7, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm7 = xmm7[1,1] ; SSE-NEXT: mulsd %xmm7, %xmm0 ; SSE-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -151,7 +151,7 @@ define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_11: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_11: Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2222,7 +2222,7 @@ define <4 x float> @combine_undef_input_test9(<4 x float> %a) { ; SSE-LABEL: combine_undef_input_test9: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test9: @@ -2412,7 +2412,7 @@ define <4 x float> @combine_undef_input_test19(<4 x float> %a) { ; SSE-LABEL: combine_undef_input_test19: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test19: