Index: llvm/trunk/lib/Target/X86/X86InstrFormats.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFormats.td +++ llvm/trunk/lib/Target/X86/X86InstrFormats.td @@ -845,7 +845,7 @@ class FMA3 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> : I, T8PD, - VEX_4V, FMASC, Requires<[HasFMA]>; + VEX_4V, FMASC, Requires<[HasFMA, NoVLX]>; // FMA4 Instruction Templates class FMA4 o, Format F, dag outs, dag ins, string asm, Index: llvm/trunk/test/CodeGen/X86/avx-isa-check.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-isa-check.ll +++ llvm/trunk/test/CodeGen/X86/avx-isa-check.ll @@ -575,3 +575,9 @@ %C = zext <8 x i8> %B to <8 x i16> ret <8 x i16> %C } + +define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + %x = fmul <4 x float> %a0, %a1 + %res = fsub <4 x float> %x, %a2 + ret <4 x float> %res +} Index: llvm/trunk/test/CodeGen/X86/avx512-fma.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-fma.ll +++ llvm/trunk/test/CodeGen/X86/avx512-fma.ll @@ -78,16 +78,11 @@ } define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { -; KNL-LABEL: test_x86_fmsub_213_m: -; KNL: ## BB#0: -; KNL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1 -; KNL-NEXT: vmovaps %zmm1, %zmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: test_x86_fmsub_213_m: -; SKX: ## BB#0: -; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 -; SKX-NEXT: retq +; ALL-LABEL: test_x86_fmsub_213_m: +; ALL: ## BB#0: +; ALL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1 +; ALL-NEXT: vmovaps %zmm1, %zmm0 +; ALL-NEXT: retq %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a1 %res = fsub double %x, %a2 Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1508,7 +1508,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz - ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07] + ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07] %a2 = load <4 x float>, <4 x float>* %ptr_a2 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind ret <4 x float> %res @@ -1516,7 +1516,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza - ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07] + ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07] %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind ret <4 x float> %res @@ -1594,7 +1594,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz - ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07] + ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0x07] %a2 = load <2 x double>, <2 x double>* %ptr_a2 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind ret <2 x double> %res @@ -1624,7 +1624,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz - ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07] + ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0x07] %a2 = load <4 x double>, <4 x double>* %ptr_a2 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind ret <4 x double> %res