Index: llvm/lib/Target/X86/X86InstrFMA.td =================================================================== --- llvm/lib/Target/X86/X86InstrFMA.td +++ llvm/lib/Target/X86/X86InstrFMA.td @@ -223,9 +223,12 @@ multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, string OpStr, Intrinsic IntF32, Intrinsic IntF64, SDNode OpNode> { + let ExeDomain = SSEPackedSingle in defm SS : fma3s_forms, fma3s_int_forms; + + let ExeDomain = SSEPackedDouble in defm SD : fma3s_forms, fma3s_int_forms, Index: llvm/test/CodeGen/X86/fma-commute-x86.ll =================================================================== --- llvm/test/CodeGen/X86/fma-commute-x86.ll +++ llvm/test/CodeGen/X86/fma-commute-x86.ll @@ -8,8 +8,8 @@ define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; CHECK-LABEL: test_x86_fmadd_baa_ss: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind @@ -102,8 +102,8 @@ define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fmadd_baa_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind @@ -113,7 +113,7 @@ define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fmadd_aba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rcx), %xmm0 +; CHECK-NEXT: vmovapd (%rcx), %xmm0 ; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind @@ -123,7 +123,7 @@ define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fmadd_bba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdx), %xmm0 +; CHECK-NEXT: vmovapd (%rdx), %xmm0 ; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind @@ -197,8 +197,8 @@ define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; CHECK-LABEL: test_x86_fnmadd_baa_ss: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind @@ -291,8 +291,8 @@ define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fnmadd_baa_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind @@ -302,7 +302,7 @@ define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fnmadd_aba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rcx), %xmm0 +; CHECK-NEXT: vmovapd (%rcx), %xmm0 ; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind @@ -312,7 +312,7 @@ define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fnmadd_bba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdx), %xmm0 +; CHECK-NEXT: vmovapd (%rdx), %xmm0 ; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind @@ -386,8 +386,8 @@ define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; CHECK-LABEL: test_x86_fmsub_baa_ss: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind @@ -480,8 +480,8 @@ define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fmsub_baa_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind @@ -491,7 +491,7 @@ define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fmsub_aba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rcx), %xmm0 +; CHECK-NEXT: vmovapd (%rcx), %xmm0 ; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind @@ -501,7 +501,7 @@ define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fmsub_bba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdx), %xmm0 +; CHECK-NEXT: vmovapd (%rdx), %xmm0 ; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind @@ -575,8 +575,8 @@ define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; CHECK-LABEL: test_x86_fnmsub_baa_ss: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind @@ -669,8 +669,8 @@ define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fnmsub_baa_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} -; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} ; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind @@ -680,7 +680,7 @@ define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fnmsub_aba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rcx), %xmm0 +; CHECK-NEXT: vmovapd (%rcx), %xmm0 ; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind @@ -690,7 +690,7 @@ define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_x86_fnmsub_bba_sd: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdx), %xmm0 +; CHECK-NEXT: vmovapd (%rdx), %xmm0 ; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind Index: llvm/test/CodeGen/X86/fma-intrinsics-x86.ll =================================================================== --- llvm/test/CodeGen/X86/fma-intrinsics-x86.ll +++ llvm/test/CodeGen/X86/fma-intrinsics-x86.ll @@ -10,8 +10,8 @@ ; CHECK-LABEL: test_x86_fma_vfmadd_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 @@ -27,8 +27,8 @@ ; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 @@ -45,8 +45,8 @@ ; CHECK-LABEL: test_x86_fma_vfmadd_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 @@ -62,12 +62,12 @@ ; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 -; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 ; ; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 ; @@ -171,8 +171,8 @@ ; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 @@ -190,8 +190,8 @@ ; CHECK-LABEL: test_x86_fma_vfmsub_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 @@ -207,12 +207,12 @@ ; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 -; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 ; ; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0 ; @@ -299,8 +299,8 @@ ; CHECK-LABEL: test_x86_fma_vfnmadd_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 @@ -316,8 +316,8 @@ ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 @@ -335,8 +335,8 @@ ; CHECK-LABEL: test_x86_fma_vfnmadd_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 @@ -352,12 +352,12 @@ ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 -; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 ; ; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0 ; @@ -444,8 +444,8 @@ ; CHECK-LABEL: test_x86_fma_vfnmsub_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 @@ -461,8 +461,8 @@ ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 @@ -480,8 +480,8 @@ ; CHECK-LABEL: test_x86_fma_vfnmsub_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 @@ -497,12 +497,12 @@ ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 -; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 ; ; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0 ; Index: llvm/test/CodeGen/X86/fma-scalar-memfold.ll =================================================================== --- llvm/test/CodeGen/X86/fma-scalar-memfold.ll +++ llvm/test/CodeGen/X86/fma-scalar-memfold.ll @@ -216,7 +216,7 @@ ; CHECK-LABEL: fmadd_aab_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -237,7 +237,7 @@ ; CHECK-LABEL: fmadd_aba_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -258,7 +258,7 @@ ; CHECK-LABEL: fmsub_aab_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -279,7 +279,7 @@ ; CHECK-LABEL: fmsub_aba_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -300,7 +300,7 @@ ; CHECK-LABEL: fnmadd_aab_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -321,7 +321,7 @@ ; CHECK-LABEL: fnmadd_aba_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -342,7 +342,7 @@ ; CHECK-LABEL: fnmsub_aab_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -363,7 +363,7 @@ ; CHECK-LABEL: fnmsub_aba_sd: ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] ; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]] -; CHECK-NEXT: vmovlps %[[XMM]], (%rcx) +; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) ; CHECK-NEXT: ret %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 Index: llvm/test/CodeGen/X86/fma_patterns.ll =================================================================== --- llvm/test/CodeGen/X86/fma_patterns.ll +++ llvm/test/CodeGen/X86/fma_patterns.ll @@ -1114,7 +1114,7 @@ define double @test_f64_fneg_fmul(double %x, double %y) #0 { ; FMA-LABEL: test_f64_fneg_fmul: ; FMA: # BB#0: -; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 ; FMA-NEXT: retq ;