Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -3566,8 +3566,19 @@ case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break; case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; } + + // MOVSD/MOVSS's 2nd operand is a FR64/FR32 reg class - we need to copy + // this over to a VR128 class like the 1st operand to use a BLENDPD/BLENDPS. + auto &MRI = MI.getParent()->getParent()->getRegInfo(); + auto VR128RC = MRI.getRegClass(MI.getOperand(1).getReg()); + unsigned VR128 = MRI.createVirtualRegister(VR128RC); + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY), + VR128) + .addReg(MI.getOperand(2).getReg()); + auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); + WorkingMI.getOperand(2).setReg(VR128); WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); Index: llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll +++ llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 + +; PR30607 + +define <2 x double> @insert_f64(double %a0, <2 x double> %a1) { +; SSE2-LABEL: insert_f64: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: insert_f64: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: retq +; +; AVX-LABEL: insert_f64: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq +; +; AVX512-LABEL: insert_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: retq + %1 = insertelement <2 x double> %a1, double %a0, i32 0 + ret <2 x double> %1 +} + +define <4 x float> @insert_f32(float %a0, <4 x float> %a1) { +; SSE2-LABEL: insert_f32: +; SSE2: # BB#0: +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: insert_f32: +; SSE41: # BB#0: +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: retq +; +; AVX-LABEL: insert_f32: +; AVX: # BB#0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: retq +; +; AVX512-LABEL: insert_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: retq + %1 = insertelement <4 x float> %a1, float %a0, i32 0 + ret <4 x float> %1 +}