Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5222,9 +5222,14 @@ } case ISD::STRICT_FADD: case ISD::STRICT_FSUB: + case ISD::STRICT_FP_ROUND: + // X87 instructions has enabled these strict fp operation. + if (Node->getSimpleValueType(0) == MVT::f80 + || (!Subtarget->hasSSE1() && Subtarget->hasX87())) + break; + LLVM_FALLTHROUGH; case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_ROUND: // FIXME: Remove when we have isel patterns for strict versions of these // nodes. CurDAG->mutateStrictFPToFP(Node); Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -587,6 +587,12 @@ setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FMUL , VT, Legal); + setOperationAction(ISD::STRICT_FDIV , VT, Legal); + setOperationAction(ISD::STRICT_FSQRT , VT, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); } } @@ -657,6 +663,14 @@ setOperationAction(ISD::LLROUND, MVT::f80, Expand); setOperationAction(ISD::LRINT, MVT::f80, Expand); setOperationAction(ISD::LLRINT, MVT::f80, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); } // f128 uses xmm registers, but most operations require libcalls. Index: llvm/lib/Target/X86/X86InstrFPStack.td =================================================================== --- llvm/lib/Target/X86/X86InstrFPStack.td +++ llvm/lib/Target/X86/X86InstrFPStack.td @@ -286,26 +286,26 @@ // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. let hasNoSchedulingInfo = 1 in { -defm ADD : FPBinary_rr; -defm SUB : FPBinary_rr; -defm MUL : FPBinary_rr; -defm DIV : FPBinary_rr; +defm ADD : FPBinary_rr; +defm SUB : FPBinary_rr; +defm MUL : FPBinary_rr; +defm DIV : FPBinary_rr; } // Sets the scheduling resources for the actual NAME#_Fm defintions. let SchedRW = [WriteFAddLd] in { -defm ADD : FPBinary; -defm SUB : FPBinary; -defm SUBR: FPBinary; +defm ADD : FPBinary; +defm SUB : FPBinary; +defm SUBR: FPBinary; } let SchedRW = [WriteFMulLd] in { -defm MUL : FPBinary; +defm MUL : FPBinary; } let SchedRW = [WriteFDivLd] in { -defm DIV : FPBinary; -defm DIVR: FPBinary; +defm DIV : FPBinary; +defm DIVR: FPBinary; } } // Uses = [FPCW], mayRaiseFPException = 1 @@ -366,7 +366,7 @@ let Uses = [FPCW], mayRaiseFPException = 1 in { let SchedRW = [WriteFSqrt80] in -defm SQRT: FPUnary; +defm SQRT: FPUnary; let SchedRW = [WriteFCom] in { let hasSideEffects = 0 in { @@ -790,19 +790,19 @@ // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, +def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, +def : Pat<(f64 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; Index: llvm/test/CodeGen/X86/fp-strict-scalar.ll =================================================================== --- llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X87 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) @@ -70,8 +70,8 @@ ret double %ret } -define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { -; SSE-X86-LABEL: fadd_fsub_f32: +define float @fadd_f32(float %a, float %b) nounwind strictfp { +; SSE-X86-LABEL: fadd_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -81,12 +81,12 @@ ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: fadd_fsub_f32: +; SSE-X64-LABEL: fadd_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: addss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: fadd_fsub_f32: +; AVX-X86-LABEL: fadd_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -96,12 +96,12 @@ ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: fadd_fsub_f32: +; AVX-X64-LABEL: fadd_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: fadd_fsub_f32: +; X87-LABEL: fadd_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fadds {{[0-9]+}}(%esp) Index: llvm/test/CodeGen/X86/fp80-strict-scalar.ll =================================================================== --- llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X64 declare x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) @@ -92,129 +92,102 @@ ret x86_fp80 %ret } -define void @fpext_f32_to_fp80(float* %val, x86_fp80* %ret) nounwind strictfp { +define x86_fp80 @fpext_f32_to_fp80(float %a) nounwind strictfp { ; X86-LABEL: fpext_f32_to_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: flds (%ecx) -; X86-NEXT: fstpt (%eax) +; X86-NEXT: flds {{[0-9]+}}(%esp) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f32_to_fp80: ; X64: # %bb.0: -; X64-NEXT: flds (%rdi) -; X64-NEXT: fstpt (%rsi) +; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: flds -{{[0-9]+}}(%rsp) ; X64-NEXT: retq - %1 = load float, float* %val, align 4 - %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %1, + %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %a, metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %ret, align 16 - ret void + ret x86_fp80 %ret } -define void @fpext_f64_to_fp80(double* %val, x86_fp80* %ret) nounwind strictfp { +define x86_fp80 @fpext_f64_to_fp80(double %a) nounwind strictfp { ; X86-LABEL: fpext_f64_to_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: fldl (%ecx) -; X86-NEXT: fstpt (%eax) +; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f64_to_fp80: ; X64: # %bb.0: -; X64-NEXT: fldl (%rdi) -; X64-NEXT: fstpt (%rsi) +; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldl -{{[0-9]+}}(%rsp) ; X64-NEXT: retq - %1 = load double, double* %val, align 8 - %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %1, + %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %a, metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %ret, align 16 - ret void + ret x86_fp80 %ret } -define void @fptrunc_fp80_to_f32(x86_fp80* %val, float *%ret) nounwind strictfp { +define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fptrunc_fp80_to_f32: ; X86: # %bb.0: ; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstps (%esp) ; X86-NEXT: flds (%esp) -; X86-NEXT: fstps (%eax) ; X86-NEXT: popl %eax ; X86-NEXT: retl ; ; X64-LABEL: fptrunc_fp80_to_f32: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstps -{{[0-9]+}}(%rsp) -; X64-NEXT: flds -{{[0-9]+}}(%rsp) -; X64-NEXT: fstps (%rsi) +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %val, align 16 - %res = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %1, + %ret = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store float %res, float* %ret, align 4 - ret void + ret float %ret } -define void @fptrunc_fp80_to_f64(x86_fp80* %val, double* %ret) nounwind strictfp { +define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fptrunc_fp80_to_f64: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: fstpl (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: fstpl (%eax) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: fptrunc_fp80_to_f64: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpl -{{[0-9]+}}(%rsp) -; X64-NEXT: fldl -{{[0-9]+}}(%rsp) -; X64-NEXT: fstpl (%rsi) +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %val, align 16 - %res = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %1, + %ret = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store double %res, double* %ret, align 8 - ret void + ret double %ret } -define void @fsqrt_fp80(x86_fp80* %a) nounwind strictfp { +define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fsqrt_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: fldt (%eax) +; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fsqrt -; X86-NEXT: fstpt (%eax) ; X86-NEXT: retl ; ; X64-LABEL: fsqrt_fp80: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fsqrt -; X64-NEXT: fstpt (%rdi) ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %a, align 16 - %res = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %1, + %ret = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %a, align 16 - ret void + ret x86_fp80 %ret } attributes #0 = { strictfp }