diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1229,8 +1229,10 @@ if (Subtarget.hasAnyFMA()) { for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, - MVT::v2f64, MVT::v4f64 }) + MVT::v2f64, MVT::v4f64 }) { setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); + } } for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { @@ -1423,6 +1425,7 @@ setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FABS, VT, Custom); setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); setOperationAction(ISD::FCOPYSIGN, VT, Custom); } diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -493,7 +493,11 @@ def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>; def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>; -def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>; +def X86fma : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>; +def X86strict_fma: SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; +def X86Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), + [(X86strict_fma node:$src1, node:$src2, node:$src3), + (X86fma node:$src1, node:$src2, node:$src3)]>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>; diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll --- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE,SSE-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86,AVX512-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64,AVX512-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X87 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) @@ -19,6 +19,8 @@ declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) define double @fadd_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fadd_f64: @@ -579,4 +581,118 @@ ret void } +define double @fma_f64(double %a, double %b, double %c) nounwind strictfp { +; SSE-X86-LABEL: fma_f64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: subl $24, %esp +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE-X86-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: calll fma +; SSE-X86-NEXT: addl $24, %esp +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: fma_f64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: pushq %rax +; SSE-X64-NEXT: callq fma +; SSE-X64-NEXT: popq %rax +; SSE-X64-NEXT: retq +; +; AVX512-X86-LABEL: fma_f64: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: pushl %ebp +; AVX512-X86-NEXT: movl %esp, %ebp +; AVX512-X86-NEXT: andl $-8, %esp +; AVX512-X86-NEXT: subl $8, %esp +; AVX512-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512-X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem +; AVX512-X86-NEXT: vmovsd %xmm1, (%esp) +; AVX512-X86-NEXT: fldl (%esp) +; AVX512-X86-NEXT: movl %ebp, %esp +; AVX512-X86-NEXT: popl %ebp +; AVX512-X86-NEXT: retl +; +; AVX512-X64-LABEL: fma_f64: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX512-X64-NEXT: retq +; +; X87-LABEL: fma_f64: +; X87: # %bb.0: +; X87-NEXT: subl $24, %esp +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: fstpl (%esp) +; X87-NEXT: calll fma +; X87-NEXT: addl $24, %esp +; X87-NEXT: retl + %res = call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %res +} + +define float @fma_f32(float %a, float %b, float %c) nounwind strictfp { +; SSE-X86-LABEL: fma_f32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: subl $12, %esp +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-X86-NEXT: movss %xmm2, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: calll fmaf +; SSE-X86-NEXT: addl $12, %esp +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: fma_f32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: pushq %rax +; SSE-X64-NEXT: callq fmaf +; SSE-X64-NEXT: popq %rax +; SSE-X64-NEXT: retq +; +; AVX512-X86-LABEL: fma_f32: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: pushl %eax +; AVX512-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX512-X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem +; AVX512-X86-NEXT: vmovss %xmm1, (%esp) +; AVX512-X86-NEXT: flds (%esp) +; AVX512-X86-NEXT: popl %eax +; AVX512-X86-NEXT: retl +; +; AVX512-X64-LABEL: fma_f32: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX512-X64-NEXT: retq +; +; X87-LABEL: fma_f32: +; X87: # %bb.0: +; X87-NEXT: subl $12, %esp +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fstps {{[0-9]+}}(%esp) +; X87-NEXT: fstps {{[0-9]+}}(%esp) +; X87-NEXT: fstps (%esp) +; X87-NEXT: calll fmaf +; X87-NEXT: addl $12, %esp +; X87-NEXT: retl + %res = call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll --- a/llvm/test/CodeGen/X86/vec-strict-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-128.ll @@ -1,8 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX @@ -18,6 +17,8 @@ declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { ; SSE-LABEL: f1: @@ -217,4 +218,40 @@ ret <2 x double> %res } +define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SSE-LABEL: f13: +; SSE: # %bb.0: +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: ret{{[l|q]}} +; +; AVX-LABEL: f13: +; AVX: # %bb.0: +; AVX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX-NEXT: ret{{[l|q]}} + %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %res +} + +define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { +; SSE-LABEL: f14: +; SSE: # %bb.0: +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: ret{{[l|q]}} +; +; AVX-LABEL: f14: +; AVX: # %bb.0: +; AVX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX-NEXT: ret{{[l|q]}} + %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-256.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s @@ -16,6 +15,8 @@ declare <8 x float> @llvm.experimental.constrained.sqrt.v8f32(<8 x float>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata) define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 { ; CHECK-LABEL: f1: @@ -154,4 +155,46 @@ ret <4 x float> %ret } +define <8 x float> @f13(<8 x float> %a, <8 x float> %b, <8 x float> %c) #0 { +; SSE-LABEL: f13: +; SSE: # %bb.0: +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: ret{{[l|q]}} +; +; AVX-LABEL: f13: +; AVX: # %bb.0: +; AVX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX-NEXT: ret{{[l|q]}} + %res = call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %res +} + +define <4 x double> @f14(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SSE-LABEL: f14: +; SSE: # %bb.0: +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: ret{{[l|q]}} +; +; AVX-LABEL: f14: +; AVX: # %bb.0: +; AVX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX-NEXT: ret{{[l|q]}} + %res = call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-512.ll @@ -14,6 +14,8 @@ declare <16 x float> @llvm.experimental.constrained.sqrt.v16f32(<16 x float>, metadata, metadata) declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float>, metadata) declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata) define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 { ; CHECK-LABEL: f1: @@ -151,4 +153,58 @@ ret <8 x float> %ret } +define <16 x float> @f13(<16 x float> %a, <16 x float> %b, <16 x float> %c) #0 { +; SSE-LABEL: f13: +; SSE: # %bb.0: +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: call{{[l|q]}} fmaf +; SSE: ret{{[l|q]}} +; +; AVX-LABEL: f13: +; AVX: # %bb.0: +; AVX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX-NEXT: ret{{[l|q]}} + %res = call <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %res +} + +define <8 x double> @f14(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 { +; SSE-LABEL: f14: +; SSE: # %bb.0: +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: call{{[l|q]}} fma +; SSE: ret{{[l|q]}} +; +; AVX-LABEL: f14: +; AVX: # %bb.0: +; AVX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX-NEXT: ret{{[l|q]}} + %res = call <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %res +} + attributes #0 = { strictfp }