Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -602,6 +602,15 @@ setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FADD , VT, Legal); + setOperationAction(ISD::STRICT_FSUB , VT, Legal); + setOperationAction(ISD::STRICT_FMUL , VT, Legal); + setOperationAction(ISD::STRICT_FDIV , VT, Legal); + setOperationAction(ISD::STRICT_FSQRT , VT, Legal); + setOperationAction(ISD::STRICT_FP_ROUND , VT, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); } } @@ -665,6 +674,15 @@ setOperationAction(ISD::LLROUND, MVT::f80, Expand); setOperationAction(ISD::LRINT, MVT::f80, Expand); setOperationAction(ISD::LLRINT, MVT::f80, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FP_ROUND , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); } // f128 uses xmm registers, but most operations require libcalls. Index: llvm/lib/Target/X86/X86InstrFPStack.td =================================================================== --- llvm/lib/Target/X86/X86InstrFPStack.td +++ llvm/lib/Target/X86/X86InstrFPStack.td @@ -286,26 +286,26 @@ // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. let hasNoSchedulingInfo = 1 in { -defm ADD : FPBinary_rr; -defm SUB : FPBinary_rr; -defm MUL : FPBinary_rr; -defm DIV : FPBinary_rr; +defm ADD : FPBinary_rr; +defm SUB : FPBinary_rr; +defm MUL : FPBinary_rr; +defm DIV : FPBinary_rr; } // Sets the scheduling resources for the actual NAME#_Fm defintions. let SchedRW = [WriteFAddLd] in { -defm ADD : FPBinary; -defm SUB : FPBinary; -defm SUBR: FPBinary; +defm ADD : FPBinary; +defm SUB : FPBinary; +defm SUBR: FPBinary; } let SchedRW = [WriteFMulLd] in { -defm MUL : FPBinary; +defm MUL : FPBinary; } let SchedRW = [WriteFDivLd] in { -defm DIV : FPBinary; -defm DIVR: FPBinary; +defm DIV : FPBinary; +defm DIVR: FPBinary; } } // Uses = [FPCW], mayRaiseFPException = 1 @@ -366,7 +366,7 @@ let Uses = [FPCW], mayRaiseFPException = 1 in { let SchedRW = [WriteFSqrt80] in -defm SQRT: FPUnary; +defm SQRT: FPUnary; let SchedRW = [WriteFCom] in { let hasSideEffects = 0 in { @@ -790,19 +790,19 @@ // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, +def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, +def : Pat<(f64 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; Index: llvm/test/CodeGen/X86/fp-strict-scalar.ll =================================================================== --- llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=X87 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) @@ -15,19 +15,23 @@ declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare float @llvm.experimental.constrained.fptrunc.f64.f32(double, metadata, metadata) +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) -define x86_regcallcc double @f1(double %a, double %b) #0 { -; SSE-LABEL: f1: +define x86_regcallcc double @fadd1(double %a, double %b) #0 { +; SSE-LABEL: fadd1: ; SSE: # %bb.0: ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f1: +; AVX-LABEL: fadd1: ; AVX: # %bb.0: ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f1: +; X87-LABEL: fadd1: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: faddl {{[0-9]+}}(%esp) @@ -38,18 +42,18 @@ ret double %ret } -define x86_regcallcc float @f2(float %a, float %b) #0 { -; SSE-LABEL: f2: +define x86_regcallcc float @fadd2(float %a, float %b) #0 { +; SSE-LABEL: fadd2: ; SSE: # %bb.0: ; SSE-NEXT: addss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f2: +; AVX-LABEL: fadd2: ; AVX: # %bb.0: ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f2: +; X87-LABEL: fadd2: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fadds {{[0-9]+}}(%esp) @@ -60,18 +64,18 @@ ret float %ret } -define x86_regcallcc double @f3(double %a, double %b) #0 { -; SSE-LABEL: f3: +define x86_regcallcc double @fsub1(double %a, double %b) #0 { +; SSE-LABEL: fsub1: ; SSE: # %bb.0: ; SSE-NEXT: subsd %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f3: +; AVX-LABEL: fsub1: ; AVX: # %bb.0: ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f3: +; X87-LABEL: fsub1: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fsubl {{[0-9]+}}(%esp) @@ -82,18 +86,18 @@ ret double %ret } -define x86_regcallcc float @f4(float %a, float %b) #0 { -; SSE-LABEL: f4: +define x86_regcallcc float @fsub2(float %a, float %b) #0 { +; SSE-LABEL: fsub2: ; SSE: # %bb.0: ; SSE-NEXT: subss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f4: +; AVX-LABEL: fsub2: ; AVX: # %bb.0: ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f4: +; X87-LABEL: fsub2: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fsubs {{[0-9]+}}(%esp) @@ -104,18 +108,18 @@ ret float %ret } -define x86_regcallcc double @f5(double %a, double %b) #0 { -; SSE-LABEL: f5: +define x86_regcallcc double @fmul1(double %a, double %b) #0 { +; SSE-LABEL: fmul1: ; SSE: # %bb.0: ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f5: +; AVX-LABEL: fmul1: ; AVX: # %bb.0: ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f5: +; X87-LABEL: fmul1: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fmull {{[0-9]+}}(%esp) @@ -126,18 +130,18 @@ ret double %ret } -define x86_regcallcc float @f6(float %a, float %b) #0 { -; SSE-LABEL: f6: +define x86_regcallcc float @fmul2(float %a, float %b) #0 { +; SSE-LABEL: fmul2: ; SSE: # %bb.0: ; SSE-NEXT: mulss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f6: +; AVX-LABEL: fmul2: ; AVX: # %bb.0: ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f6: +; X87-LABEL: fmul2: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fmuls {{[0-9]+}}(%esp) @@ -148,18 +152,18 @@ ret float %ret } -define x86_regcallcc double @f7(double %a, double %b) #0 { -; SSE-LABEL: f7: +define x86_regcallcc double @fdiv1(double %a, double %b) #0 { +; SSE-LABEL: fdiv1: ; SSE: # %bb.0: ; SSE-NEXT: divsd %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f7: +; AVX-LABEL: fdiv1: ; AVX: # %bb.0: ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f7: +; X87-LABEL: fdiv1: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fdivl {{[0-9]+}}(%esp) @@ -170,18 +174,18 @@ ret double %ret } -define x86_regcallcc float @f8(float %a, float %b) #0 { -; SSE-LABEL: f8: +define x86_regcallcc float @fdiv2(float %a, float %b) #0 { +; SSE-LABEL: fdiv2: ; SSE: # %bb.0: ; SSE-NEXT: divss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; -; AVX-LABEL: f8: +; AVX-LABEL: fdiv2: ; AVX: # %bb.0: ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; -; X87-LABEL: f8: +; X87-LABEL: fdiv2: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fdivs {{[0-9]+}}(%esp) @@ -192,4 +196,123 @@ ret float %ret } +define x86_regcallcc void @fpext1(float* %val, double* %ret) #0 { +; SSE-LABEL: fpext1: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cvtss2sd %xmm0, %xmm0 +; SSE-NEXT: movsd %xmm0, (%{{[r|e]}}cx) +; SSE-NEXT: ret{{q|l}} +; +; AVX-LABEL: fpext1: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd %xmm0, (%{{[r|e]}}cx) +; AVX-NEXT: ret{{q|l}} +; +; X87-LABEL: fpext1: +; X87: # %bb.0: +; X87-NEXT: flds (%eax) +; X87-NEXT: fstpl (%ecx) +; X87-NEXT: retl + %1 = load float, float* %val, align 4 + %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1, + metadata !"fpexcept.strict") #0 + store double %res, double* %ret, align 8 + ret void +} + +define x86_regcallcc void @fptrunc1(double* %val, float *%ret) #0 { +; SSE-LABEL: fptrunc1: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: cvtsd2ss %xmm0, %xmm0 +; SSE-NEXT: movss %xmm0, (%{{r|e}}cx) +; SSE-NEXT: ret{{q|l}} +; +; AVX-LABEL: fptrunc1: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss %xmm0, (%{{r|e}}cx) +; AVX-NEXT: ret{{q|l}} +; +; X87-LABEL: fptrunc1: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: fldl (%eax) +; X87-NEXT: fstps (%esp) +; X87-NEXT: flds (%esp) +; X87-NEXT: fstps (%ecx) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %1 = load double, double* %val, align 8 + %res = call float @llvm.experimental.constrained.fptrunc.f64.f32(double %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %ret, align 4 + ret void +} + +define x86_regcallcc void @fsqrt1(float* %a) #0 { +; SSE-LABEL: fsqrt1: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: sqrtss %xmm0, %xmm0 +; SSE-NEXT: movss %xmm0, (%{{r|e}}ax) +; SSE-NEXT: ret{{q|l}} +; +; AVX-LABEL: fsqrt1: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss %xmm0, (%{{r|e}}ax) +; AVX-NEXT: ret{{q|l}} +; +; X87-LABEL: fsqrt1: +; X87: # %bb.0: +; X87-NEXT: flds (%eax) +; X87-NEXT: fsqrt +; X87-NEXT: fstps (%eax) +; X87-NEXT: retl + %1 = load float, float* %a, align 4 + %res = call float @llvm.experimental.constrained.sqrt.f32(float %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %a, align 4 + ret void +} + +define x86_regcallcc void @fsqrt2(double* %a) #0 { +; SSE-LABEL: fsqrt2: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: sqrtsd %xmm0, %xmm0 +; SSE-NEXT: movsd %xmm0, (%{{r|e}}ax) +; SSE-NEXT: ret{{q|l}} +; +; AVX-LABEL: fsqrt2: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd %xmm0, (%{{r|e}}ax) +; AVX-NEXT: ret{{q|l}} +; +; X87-LABEL: fsqrt2: +; X87: # %bb.0: +; X87-NEXT: fldl (%eax) +; X87-NEXT: fsqrt +; X87-NEXT: fstpl (%eax) +; X87-NEXT: retl + %1 = load double, double* %a, align 8 + %res = call double @llvm.experimental.constrained.sqrt.f64(double %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store double %res, double* %a, align 8 + ret void +} + attributes #0 = { strictfp } Index: llvm/test/CodeGen/X86/fp80-strict-scalar.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -0,0 +1,465 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87 + + + +declare x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float, metadata) +declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double, metadata) +declare x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80, metadata, metadata) + +define x86_regcallcc x86_fp80 @fadd1(x86_fp80 %a, x86_fp80 %b) #0 { +; SSE-X86-LABEL: fadd1: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-X86-NEXT: faddp %st, %st(1) +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fadd1: +; SSE-64: # %bb.0: +; SSE-64-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-64-NEXT: faddp %st, %st(1) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fadd1: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: fldt {{[0-9]+}}(%esp) +; AVX-X86-NEXT: faddp %st, %st(1) +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fadd1: +; AVX-64: # %bb.0: +; AVX-64-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-64-NEXT: faddp %st, %st(1) +; AVX-64-NEXT: retq +; +; X87-LABEL: fadd1: +; X87: # %bb.0: +; X87-NEXT: fldt {{[0-9]+}}(%esp) +; X87-NEXT: faddp %st, %st(1) +; X87-NEXT: retl + %ret = call x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define x86_regcallcc x86_fp80 @fsub1(x86_fp80 %a, x86_fp80 %b) #0 { +; SSE-X86-LABEL: fsub1: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fsubrp %st, %st(1) +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fsub1: +; SSE-64: # %bb.0: +; SSE-64-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-64-NEXT: fsubrp %st, %st(1) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fsub1: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: fldt {{[0-9]+}}(%esp) +; AVX-X86-NEXT: fsubrp %st, %st(1) +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fsub1: +; AVX-64: # %bb.0: +; AVX-64-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-64-NEXT: fsubrp %st, %st(1) +; AVX-64-NEXT: retq +; +; X87-LABEL: fsub1: +; X87: # %bb.0: +; X87-NEXT: fldt {{[0-9]+}}(%esp) +; X87-NEXT: fsubrp %st, %st(1) +; X87-NEXT: retl + %ret = call x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define x86_regcallcc x86_fp80 @fmul1(x86_fp80 %a, x86_fp80 %b) #0 { +; SSE-X86-LABEL: fmul1: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fmulp %st, %st(1) +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fmul1: +; SSE-64: # %bb.0: +; SSE-64-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-64-NEXT: fmulp %st, %st(1) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fmul1: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: fldt {{[0-9]+}}(%esp) +; AVX-X86-NEXT: fmulp %st, %st(1) +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fmul1: +; AVX-64: # %bb.0: +; AVX-64-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-64-NEXT: fmulp %st, %st(1) +; AVX-64-NEXT: retq +; +; X87-LABEL: fmul1: +; X87: # %bb.0: +; X87-NEXT: fldt {{[0-9]+}}(%esp) +; X87-NEXT: fmulp %st, %st(1) +; X87-NEXT: retl + %ret = call x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define x86_regcallcc x86_fp80 @fdiv1(x86_fp80 %a, x86_fp80 %b) #0 { +; SSE-X86-LABEL: fdiv1: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fdivrp %st, %st(1) +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fdiv1: +; SSE-64: # %bb.0: +; SSE-64-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-64-NEXT: fdivrp %st, %st(1) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fdiv1: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: fldt {{[0-9]+}}(%esp) +; AVX-X86-NEXT: fdivrp %st, %st(1) +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fdiv1: +; AVX-64: # %bb.0: +; AVX-64-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-64-NEXT: fdivrp %st, %st(1) +; AVX-64-NEXT: retq +; +; X87-LABEL: fdiv1: +; X87: # %bb.0: +; X87-NEXT: fldt {{[0-9]+}}(%esp) +; X87-NEXT: fdivrp %st, %st(1) +; X87-NEXT: retl + %ret = call x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define x86_regcallcc void @fpext1(float* %val, x86_fp80* %ret) #0 { +; SSE-X86-LABEL: fpext1: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: fstpt (%ecx) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fpext1: +; SSE-64: # %bb.0: +; SSE-64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; SSE-64-NEXT: flds -{{[0-9]+}}(%rsp) +; SSE-64-NEXT: fstpt (%rcx) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fpext1: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: fstpt (%ecx) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fpext1: +; AVX-64: # %bb.0: +; AVX-64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-64-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; AVX-64-NEXT: flds -{{[0-9]+}}(%rsp) +; AVX-64-NEXT: fstpt (%rcx) +; AVX-64-NEXT: retq +; +; X87-LABEL: fpext1: +; X87: # %bb.0: +; X87-NEXT: flds (%eax) +; X87-NEXT: fstpt (%ecx) +; X87-NEXT: retl + %1 = load float, float* %val, align 4 + %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %1, + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %ret, align 16 + ret void +} + +define x86_regcallcc void @fpext2(double* %val, x86_fp80* %ret) #0 { +; SSE-X86-LABEL: fpext2: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: fstpt (%ecx) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fpext2: +; SSE-64: # %bb.0: +; SSE-64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; SSE-64-NEXT: fldl -{{[0-9]+}}(%rsp) +; SSE-64-NEXT: fstpt (%rcx) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fpext2: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: fstpt (%ecx) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fpext2: +; AVX-64: # %bb.0: +; AVX-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-64-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; AVX-64-NEXT: fldl -{{[0-9]+}}(%rsp) +; AVX-64-NEXT: fstpt (%rcx) +; AVX-64-NEXT: retq +; +; X87-LABEL: fpext2: +; X87: # %bb.0: +; X87-NEXT: fldl (%eax) +; X87-NEXT: fstpt (%ecx) +; X87-NEXT: retl + %1 = load double, double* %val, align 8 + %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %1, + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %ret, align 16 + ret void +} + +define x86_regcallcc void @fptrunc1(x86_fp80* %val, float *%ret) #0 { +; SSE-X86-LABEL: fptrunc1: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: fldt (%eax) +; SSE-X86-NEXT: fstps (%esp) +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: movss %xmm0, (%ecx) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fptrunc1: +; SSE-64: # %bb.0: +; SSE-64-NEXT: fldt (%rax) +; SSE-64-NEXT: fstps -{{[0-9]+}}(%rsp) +; SSE-64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-64-NEXT: movss %xmm0, (%rcx) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fptrunc1: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: fldt (%eax) +; AVX-X86-NEXT: fstps (%esp) +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vmovss %xmm0, (%ecx) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fptrunc1: +; AVX-64: # %bb.0: +; AVX-64-NEXT: fldt (%rax) +; AVX-64-NEXT: fstps -{{[0-9]+}}(%rsp) +; AVX-64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-64-NEXT: vmovss %xmm0, (%rcx) +; AVX-64-NEXT: retq +; +; X87-LABEL: fptrunc1: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: fldt (%eax) +; X87-NEXT: fstps (%esp) +; X87-NEXT: flds (%esp) +; X87-NEXT: fstps (%ecx) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %1 = load x86_fp80, x86_fp80* %val, align 16 + %res = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %ret, align 4 + ret void +} + +define x86_regcallcc void @fptrunc2(x86_fp80* %val, double* %ret) #0 { +; SSE-X86-LABEL: fptrunc2: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: fldt (%eax) +; SSE-X86-NEXT: fstpl (%esp) +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: movsd %xmm0, (%ecx) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fptrunc2: +; SSE-64: # %bb.0: +; SSE-64-NEXT: fldt (%rax) +; SSE-64-NEXT: fstpl -{{[0-9]+}}(%rsp) +; SSE-64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-64-NEXT: movsd %xmm0, (%rcx) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fptrunc2: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: fldt (%eax) +; AVX-X86-NEXT: fstpl (%esp) +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vmovsd %xmm0, (%ecx) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fptrunc2: +; AVX-64: # %bb.0: +; AVX-64-NEXT: fldt (%rax) +; AVX-64-NEXT: fstpl -{{[0-9]+}}(%rsp) +; AVX-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-64-NEXT: vmovsd %xmm0, (%rcx) +; AVX-64-NEXT: retq +; +; X87-LABEL: fptrunc2: +; X87: # %bb.0: +; X87-NEXT: pushl %ebp +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: .cfi_offset %ebp, -8 +; X87-NEXT: movl %esp, %ebp +; X87-NEXT: .cfi_def_cfa_register %ebp +; X87-NEXT: andl $-8, %esp +; X87-NEXT: subl $8, %esp +; X87-NEXT: fldt (%eax) +; X87-NEXT: fstpl (%esp) +; X87-NEXT: fldl (%esp) +; X87-NEXT: fstpl (%ecx) +; X87-NEXT: movl %ebp, %esp +; X87-NEXT: popl %ebp +; X87-NEXT: .cfi_def_cfa %esp, 4 +; X87-NEXT: retl + %1 = load x86_fp80, x86_fp80* %val, align 16 + %res = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store double %res, double* %ret, align 8 + ret void +} + +define x86_regcallcc void @fsqrt1(x86_fp80* %a) #0 { +; SSE-X86-LABEL: fsqrt1: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: fldt (%eax) +; SSE-X86-NEXT: fsqrt +; SSE-X86-NEXT: fstpt (%eax) +; SSE-X86-NEXT: retl +; +; SSE-64-LABEL: fsqrt1: +; SSE-64: # %bb.0: +; SSE-64-NEXT: fldt (%rax) +; SSE-64-NEXT: fsqrt +; SSE-64-NEXT: fstpt (%rax) +; SSE-64-NEXT: retq +; +; AVX-X86-LABEL: fsqrt1: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: fldt (%eax) +; AVX-X86-NEXT: fsqrt +; AVX-X86-NEXT: fstpt (%eax) +; AVX-X86-NEXT: retl +; +; AVX-64-LABEL: fsqrt1: +; AVX-64: # %bb.0: +; AVX-64-NEXT: fldt (%rax) +; AVX-64-NEXT: fsqrt +; AVX-64-NEXT: fstpt (%rax) +; AVX-64-NEXT: retq +; +; X87-LABEL: fsqrt1: +; X87: # %bb.0: +; X87-NEXT: fldt (%eax) +; X87-NEXT: fsqrt +; X87-NEXT: fstpt (%eax) +; X87-NEXT: retl + %1 = load x86_fp80, x86_fp80* %a, align 16 + %res = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %a, align 16 + ret void +} + +attributes #0 = { strictfp }