diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -740,6 +740,15 @@ setOperationAction(ISD::FEXP2, VT, Expand); } + // Handle constrained floating-point operations of vector types. + for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, + MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + } + // First set operation action for all vector types to either promote // (for widening) or expand (for scalarization). Then we will selectively // turn on ones that can be effectively codegen'd. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5391,13 +5391,13 @@ NAME#"SD">, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } -defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, +defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, SchedWriteFAddSizes, 1>, SIMD_EXC; -defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds, +defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, SchedWriteFMulSizes, 1>, SIMD_EXC; -defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds, +defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, SchedWriteFAddSizes, 0>, SIMD_EXC; -defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds, +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, SchedWriteFDivSizes, 0>, SIMD_EXC; defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, SchedWriteFCmpSizes, 0>; @@ -5546,16 +5546,16 @@ EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } -defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, +defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512, SchedWriteFAddSizes, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>, SIMD_EXC; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, +defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512, SchedWriteFMulSizes, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>, SIMD_EXC; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512, SchedWriteFAddSizes>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>, SIMD_EXC; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512, SchedWriteFDivSizes>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>, SIMD_EXC; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2621,18 +2621,18 @@ } // Binary Arithmetic instructions -defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>, +defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>, basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>, SIMD_EXC; -defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>, - basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>, +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>, + basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>, basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>, SIMD_EXC; let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>, + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>, SIMD_EXC; - defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>, - basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>, + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, + basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>, SIMD_EXC; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, @@ -2727,15 +2727,15 @@ } } -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-128.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX + +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: f1: +; SSE: # %bb.0: +; SSE-NEXT: addpd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f1: +; AVX: # %bb.0: +; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f2(<4 x float> %a, <4 x float> %b) #0 { +; SSE-LABEL: f2: +; SSE: # %bb.0: +; SSE-NEXT: addps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f2: +; AVX: # %bb.0: +; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <2 x double> @f3(<2 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: f3: +; SSE: # %bb.0: +; SSE-NEXT: subpd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f3: +; AVX: # %bb.0: +; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f4(<4 x float> %a, <4 x float> %b) #0 { +; SSE-LABEL: f4: +; SSE: # %bb.0: +; SSE-NEXT: subps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f4: +; AVX: # %bb.0: +; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <2 x double> @f5(<2 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: f5: +; SSE: # %bb.0: +; SSE-NEXT: mulpd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f5: +; AVX: # %bb.0: +; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f6(<4 x float> %a, <4 x float> %b) #0 { +; SSE-LABEL: f6: +; SSE: # %bb.0: +; SSE-NEXT: mulps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f6: +; AVX: # %bb.0: +; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <2 x double> @f7(<2 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: f7: +; SSE: # %bb.0: +; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f7: +; AVX: # %bb.0: +; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f8(<4 x float> %a, <4 x float> %b) #0 { +; SSE-LABEL: f8: +; SSE: # %bb.0: +; SSE-NEXT: divps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: f8: +; AVX: # %bb.0: +; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-256.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s + +declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fsub.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float>, <8 x float>, metadata, metadata) + +define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f2(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <4 x double> @f3(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f4(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fsub.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <4 x double> @f5(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f6(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <4 x double> @f7(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f8(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-512.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s + +declare <8 x double> @llvm.experimental.constrained.fadd.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fsub.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fsub.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fmul.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fmul.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fdiv.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fdiv.v16f32(<16 x float>, <16 x float>, metadata, metadata) + +define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fadd.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f2(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} + +define <8 x double> @f3(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fsub.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f4(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fsub.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} + +define <8 x double> @f5(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fmul.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f6(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fmul.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} + +define <8 x double> @f7(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fdiv.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f8(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fdiv.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -113,10 +113,10 @@ ; CHECK-LABEL: constrained_vector_fdiv_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] -; CHECK-NEXT: divpd %xmm2, %xmm0 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0] ; CHECK-NEXT: divpd %xmm2, %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] +; CHECK-NEXT: divpd %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fdiv_v4f64: @@ -498,10 +498,10 @@ define <4 x double> @constrained_vector_fmul_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fmul_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [2.0E+0,3.0E+0] -; CHECK-NEXT: mulpd %xmm1, %xmm0 -; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0] +; CHECK-NEXT: mulpd %xmm0, %xmm1 +; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fmul_v4f64: @@ -544,19 +544,14 @@ define <2 x double> @constrained_vector_fadd_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: addsd %xmm0, %xmm1 -; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( @@ -603,24 +598,22 @@ define <3 x double> @constrained_vector_fadd_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm2, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v3f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm2 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -636,28 +629,16 @@ define <4 x double> @constrained_vector_fadd_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1] +; CHECK-NEXT: addpd %xmm0, %xmm1 +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: retq entry: %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( @@ -694,19 +675,14 @@ define <2 x double> @constrained_vector_fsub_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fsub_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( @@ -756,12 +732,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd %xmm0, %xmm2 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; @@ -770,9 +746,8 @@ ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm2 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -788,28 +763,16 @@ define <4 x double> @constrained_vector_fsub_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fsub_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: retq entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(