Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -597,6 +597,17 @@ setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); } + + // Handle constrained floating-point operations of scalar. + for (auto VT : { MVT::f32, MVT::f64, MVT::f80 }) { + setOperationAction(ISD::STRICT_FADD , VT, Legal); + setOperationAction(ISD::STRICT_FSUB , VT, Legal); + setOperationAction(ISD::STRICT_FMUL , VT, Legal); + setOperationAction(ISD::STRICT_FDIV , VT, Legal); + setOperationAction(ISD::STRICT_FSQRT , VT, Legal); + setOperationAction(ISD::STRICT_FP_ROUND , VT, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + } } // Expand FP32 immediates into loads from the stack, save special cases. Index: llvm/lib/Target/X86/X86InstrFPStack.td =================================================================== --- llvm/lib/Target/X86/X86InstrFPStack.td +++ llvm/lib/Target/X86/X86InstrFPStack.td @@ -286,26 +286,26 @@ // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. let hasNoSchedulingInfo = 1 in { -defm ADD : FPBinary_rr; -defm SUB : FPBinary_rr; -defm MUL : FPBinary_rr; -defm DIV : FPBinary_rr; +defm ADD : FPBinary_rr; +defm SUB : FPBinary_rr; +defm MUL : FPBinary_rr; +defm DIV : FPBinary_rr; } // Sets the scheduling resources for the actual NAME#_Fm defintions. let SchedRW = [WriteFAddLd] in { -defm ADD : FPBinary; -defm SUB : FPBinary; -defm SUBR: FPBinary; +defm ADD : FPBinary; +defm SUB : FPBinary; +defm SUBR: FPBinary; } let SchedRW = [WriteFMulLd] in { -defm MUL : FPBinary; +defm MUL : FPBinary; } let SchedRW = [WriteFDivLd] in { -defm DIV : FPBinary; -defm DIVR: FPBinary; +defm DIV : FPBinary; +defm DIVR: FPBinary; } } // Uses = [FPCW], mayRaiseFPException = 1 @@ -366,11 +366,11 @@ let Uses = [FPCW], mayRaiseFPException = 1 in { let SchedRW = [WriteFSqrt80] in -defm SQRT: FPUnary; +defm SQRT: FPUnary; let SchedRW = [WriteMicrocoded] in { -defm SIN : FPUnary; -defm COS : FPUnary; +defm SIN : FPUnary; +defm COS : FPUnary; } let SchedRW = [WriteFCom] in { @@ -793,19 +793,19 @@ // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, +def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, +def : Pat<(f64 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; Index: llvm/test/CodeGen/X86/x87-fp-strict-add.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/x87-fp-strict-add.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) + +define void @f1(float* %a, float* %b) #0 { +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fadds (%rsi) +; X64-NEXT: fstps (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds (%ecx) +; X86-NEXT: fadds (%eax) +; X86-NEXT: fstps (%ecx) +; X86-NEXT: retl + %1 = load float, float* %a, align 4, !tbaa !0 + %2 = load float, float* %b, align 4, !tbaa !0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %1, float %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store float %add, float* %a, align 4, !tbaa !0 + ret void +} + +define void @f2(double* %a, double* %b) #0 { +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: faddl (%rsi) +; X64-NEXT: fstpl (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldl (%ecx) +; X86-NEXT: faddl (%eax) +; X86-NEXT: fstpl (%ecx) +; X86-NEXT: retl + %1 = load double, double* %a, align 8, !tbaa !4 + %2 = load double, double* %b, align 8, !tbaa !4 + %add = call double @llvm.experimental.constrained.fadd.f64(double %1, double %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store double %add, double* %a, align 8, !tbaa !4 + ret void +} + +define void @f3(x86_fp80* %a, x86_fp80* %b) #0 { +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt (%rsi) +; X64-NEXT: faddp %st, %st(1) +; X64-NEXT: fstpt (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt (%eax) +; X86-NEXT: faddp %st, %st(1) +; X86-NEXT: fstpt (%ecx) +; X86-NEXT: retl + %1 = load x86_fp80, x86_fp80* %a, align 16, !tbaa !6 + %2 = load x86_fp80, x86_fp80* %b, align 16, !tbaa !6 + %add = call x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80 %1, x86_fp80 %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store x86_fp80 %add, x86_fp80* %a, align 16, !tbaa !6 + ret void +} + +attributes #0 = { strictfp } + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"long double", !2, i64 0} Index: llvm/test/CodeGen/X86/x87-fp-strict-div.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/x87-fp-strict-div.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) + +define void @f1(float* %a, float* %b) #0 { +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fdivs (%rsi) +; X64-NEXT: fstps (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds (%ecx) +; X86-NEXT: fdivs (%eax) +; X86-NEXT: fstps (%ecx) +; X86-NEXT: retl + %1 = load float, float* %a, align 4, !tbaa !0 + %2 = load float, float* %b, align 4, !tbaa !0 + %add = call float @llvm.experimental.constrained.fdiv.f32(float %1, float %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store float %add, float* %a, align 4, !tbaa !0 + ret void +} + +define void @f2(double* %a, double* %b) #0 { +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: fdivl (%rsi) +; X64-NEXT: fstpl (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldl (%ecx) +; X86-NEXT: fdivl (%eax) +; X86-NEXT: fstpl (%ecx) +; X86-NEXT: retl + %1 = load double, double* %a, align 8, !tbaa !4 + %2 = load double, double* %b, align 8, !tbaa !4 + %add = call double @llvm.experimental.constrained.fdiv.f64(double %1, double %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store double %add, double* %a, align 8, !tbaa !4 + ret void +} + +define void @f3(x86_fp80* %a, x86_fp80* %b) #0 { +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt (%rsi) +; X64-NEXT: fdivrp %st, %st(1) +; X64-NEXT: fstpt (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt (%eax) +; X86-NEXT: fdivrp %st, %st(1) +; X86-NEXT: fstpt (%ecx) +; X86-NEXT: retl + %1 = load x86_fp80, x86_fp80* %a, align 16, !tbaa !6 + %2 = load x86_fp80, x86_fp80* %b, align 16, !tbaa !6 + %add = call x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80 %1, x86_fp80 %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store x86_fp80 %add, x86_fp80* %a, align 16, !tbaa !6 + ret void +} + +attributes #0 = { strictfp } + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"long double", !2, i64 0} Index: llvm/test/CodeGen/X86/x87-fp-strict-fpextend.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/x87-fp-strict-fpextend.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 + +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float, metadata) +declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double, metadata) + +define void @f1(float* %val, double* %ret) #0 { +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fstpl (%rsi) +; X64-NEXT: retq +; +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds (%ecx) +; X86-NEXT: fstpl (%eax) +; X86-NEXT: retl + %1 = load float, float* %val, align 4, !tbaa !0 + %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1, + metadata !"fpexcept.strict") #0 + store double %res, double* %ret, align 8, !tbaa !4 + ret void +} + +define void @f2(float* %val, x86_fp80* %ret) #0 { +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fstpt (%rsi) +; X64-NEXT: retq +; +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds (%ecx) +; X86-NEXT: fstpt (%eax) +; X86-NEXT: retl + %1 = load float, float* %val, align 4, !tbaa !0 + %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32 + (float %1, metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %ret, align 16, !tbaa !6 + ret void +} + +define void @f3(double* %val, x86_fp80* %ret) #0 { +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: fstpt (%rsi) +; X64-NEXT: retq +; +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldl (%ecx) +; X86-NEXT: fstpt (%eax) +; X86-NEXT: retl + %1 = load double, double* %val, align 8, !tbaa !4 + %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %1, + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %ret, align 16, !tbaa !4 + ret void +} + +attributes #0 = { strictfp } + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"long double", !2, i64 0} Index: llvm/test/CodeGen/X86/x87-fp-strict-fpround.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/x87-fp-strict-fpround.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 + +declare float @llvm.experimental.constrained.fptrunc.f64.f32(double, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80, metadata, metadata) + +define void @f1(double* %val, float *%ret) #0 { +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: fstps -{{[0-9]+}}(%rsp) +; X64-NEXT: flds -{{[0-9]+}}(%rsp) +; X64-NEXT: fstps (%rsi) +; X64-NEXT: retq +; +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldl (%ecx) +; X86-NEXT: fstps (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: fstps (%eax) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl + %1 = load double, double* %val, align 8, !tbaa !4 + %res = call float @llvm.experimental.constrained.fptrunc.f64.f32( + double %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %ret, align 4, !tbaa !0 + ret void +} + +define void @f2(x86_fp80* %val, float *%ret) #0 { +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fstps -{{[0-9]+}}(%rsp) +; X64-NEXT: flds -{{[0-9]+}}(%rsp) +; X64-NEXT: fstps (%rsi) +; X64-NEXT: retq +; +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fstps (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: fstps (%eax) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl + %1 = load x86_fp80, x86_fp80* %val, align 16, !tbaa !6 + %res = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32( + x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %ret, align 4, !tbaa !0 + ret void +} + +define void @f3(x86_fp80* %val, double* %ret) #0 { +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fstpl -{{[0-9]+}}(%rsp) +; X64-NEXT: fldl -{{[0-9]+}}(%rsp) +; X64-NEXT: fstpl (%rsi) +; X64-NEXT: retq +; +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: fstpl (%eax) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl + %1 = load x86_fp80, x86_fp80* %val, align 16, !tbaa !6 + %res = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64( + x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store double %res, double* %ret, align 8, !tbaa !4 + ret void +} + +attributes #0 = { strictfp } + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"long double", !2, i64 0} Index: llvm/test/CodeGen/X86/x87-fp-strict-mul.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/x87-fp-strict-mul.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) + +define void @f1(float* %a, float* %b) #0 { +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fmuls (%rsi) +; X64-NEXT: fstps (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds (%ecx) +; X86-NEXT: fmuls (%eax) +; X86-NEXT: fstps (%ecx) +; X86-NEXT: retl + %1 = load float, float* %a, align 4, !tbaa !0 + %2 = load float, float* %b, align 4, !tbaa !0 + %add = call float @llvm.experimental.constrained.fmul.f32(float %1, float %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store float %add, float* %a, align 4, !tbaa !0 + ret void +} + +define void @f2(double* %a, double* %b) #0 { +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: fmull (%rsi) +; X64-NEXT: fstpl (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldl (%ecx) +; X86-NEXT: fmull (%eax) +; X86-NEXT: fstpl (%ecx) +; X86-NEXT: retl + %1 = load double, double* %a, align 8, !tbaa !4 + %2 = load double, double* %b, align 8, !tbaa !4 + %add = call double @llvm.experimental.constrained.fmul.f64(double %1, double %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store double %add, double* %a, align 8, !tbaa !4 + ret void +} + +define void @f3(x86_fp80* %a, x86_fp80* %b) #0 { +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt (%rsi) +; X64-NEXT: fmulp %st, %st(1) +; X64-NEXT: fstpt (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt (%eax) +; X86-NEXT: fmulp %st, %st(1) +; X86-NEXT: fstpt (%ecx) +; X86-NEXT: retl + %1 = load x86_fp80, x86_fp80* %a, align 16, !tbaa !6 + %2 = load x86_fp80, x86_fp80* %b, align 16, !tbaa !6 + %add = call x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80 %1, x86_fp80 %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store x86_fp80 %add, x86_fp80* %a, align 16, !tbaa !6 + ret void +} + +attributes #0 = { strictfp } + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"long double", !2, i64 0} Index: llvm/test/CodeGen/X86/x87-fp-strict-sqrt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/x87-fp-strict-sqrt.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 + +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80, metadata, metadata) +; Check register square root. +define void @f1(float* %a) #0 { +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fsqrt +; X64-NEXT: fstps (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: flds (%eax) +; X86-NEXT: fsqrt +; X86-NEXT: fstps (%eax) +; X86-NEXT: retl + %1 = load float, float* %a, align 4, !tbaa !0 + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %a, align 4, !tbaa !0 + ret void +} + +define void @f2(double* %a) #0 { +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: fsqrt +; X64-NEXT: fstpl (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldl (%eax) +; X86-NEXT: fsqrt +; X86-NEXT: fstpl (%eax) +; X86-NEXT: retl + %1 = load double, double* %a, align 8, !tbaa !4 + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store double %res, double* %a, align 8, !tbaa !4 + ret void +} + +define void @f3(x86_fp80* %a) #0 { +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fsqrt +; X64-NEXT: fstpt (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldt (%eax) +; X86-NEXT: fsqrt +; X86-NEXT: fstpt (%eax) +; X86-NEXT: retl + %1 = load x86_fp80, x86_fp80* %a, align 16, !tbaa !6 + %res = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80( + x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %a, align 16, !tbaa !6 + ret void +} + +attributes #0 = { strictfp } + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"long double", !2, i64 0} + Index: llvm/test/CodeGen/X86/x87-fp-strict-sub.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/x87-fp-strict-sub.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+x87 -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) + +define void @f1(float* %a, float* %b) #0 { +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fsubs (%rsi) +; X64-NEXT: fstps (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds (%ecx) +; X86-NEXT: fsubs (%eax) +; X86-NEXT: fstps (%ecx) +; X86-NEXT: retl + %1 = load float, float* %a, align 4, !tbaa !0 + %2 = load float, float* %b, align 4, !tbaa !0 + %add = call float @llvm.experimental.constrained.fsub.f32(float %1, float %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store float %add, float* %a, align 4, !tbaa !0 + ret void +} + +define void @f2(double* %a, double* %b) #0 { +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: fsubl (%rsi) +; X64-NEXT: fstpl (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldl (%ecx) +; X86-NEXT: fsubl (%eax) +; X86-NEXT: fstpl (%ecx) +; X86-NEXT: retl + %1 = load double, double* %a, align 8, !tbaa !4 + %2 = load double, double* %b, align 8, !tbaa !4 + %add = call double @llvm.experimental.constrained.fsub.f64(double %1, double %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store double %add, double* %a, align 8, !tbaa !4 + ret void +} + +define void @f3(x86_fp80* %a, x86_fp80* %b) #0 { +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt (%rsi) +; X64-NEXT: fsubrp %st, %st(1) +; X64-NEXT: fstpt (%rdi) +; X64-NEXT: retq +; +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt (%eax) +; X86-NEXT: fsubrp %st, %st(1) +; X86-NEXT: fstpt (%ecx) +; X86-NEXT: retl + %1 = load x86_fp80, x86_fp80* %a, align 16, !tbaa !6 + %2 = load x86_fp80, x86_fp80* %b, align 16, !tbaa !6 + %add = call x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80 %1, x86_fp80 %2, + metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store x86_fp80 %add, x86_fp80* %a, align 16, !tbaa !6 + ret void +} + +attributes #0 = { strictfp } + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"long double", !2, i64 0}