diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -106,6 +106,7 @@ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::STRICT_FP16_TO_FP: case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; case ISD::STRICT_FPOW: case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; @@ -539,10 +540,12 @@ // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special // nodes? SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); - SDValue Op = N->getOperand(0); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + EVT OpsVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, CallOptions, SDLoc(N)).first; @@ -552,7 +555,11 @@ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first; + std::pair Tmp = + TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -2905,6 +2912,8 @@ case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break; case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; case ISD::FP_TO_SINT_SAT: @@ -2967,12 +2976,22 @@ } SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); + if (IsStrict) { + SDValue Res = DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N), + {NVT, MVT::Other}, {N->getOperand(0), Op}); + Res = DAG.getNode(N->getOpcode(), dl, {N->getValueType(0), MVT::Other}, + {N->getOperand(0), Res}); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); diff --git a/llvm/test/CodeGen/X86/fp16-promote-strict.ll b/llvm/test/CodeGen/X86/fp16-promote-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp16-promote-strict.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefixes=X64-SSE +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefixes=X64-SSE +; RUN: llc < %s -mtriple=x86_64-linux-android -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64-AVX512 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64-AVX512 +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=-sse | FileCheck %s --check-prefixes=X86 + +; Check SoftPromoteHalfOperand and SoftenFloatResult function calls. + +define i8 @fptosi_i8_half(half %x) nounwind strictfp { +; X64-SSE-LABEL: fptosi_i8_half: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: cvttss2si %xmm0, %eax +; X64-SSE-NEXT: # kill: def $al killed $al killed $eax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX512-LABEL: fptosi_i8_half: +; X64-AVX512: # %bb.0: # %entry +; X64-AVX512-NEXT: vcvttsh2si %xmm0, %eax +; X64-AVX512-NEXT: # kill: def $al killed $al killed $eax +; X64-AVX512-NEXT: retq +; +; X86-LABEL: fptosi_i8_half: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __gnu_h2f_ieee +; X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-NEXT: fistps {{[0-9]+}}(%esp) +; X86-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %conv = call i8 @llvm.experimental.constrained.fptosi.i8.f16(half %x, metadata !"fpexcept.strict") #0 + ret i8 %conv +} + +define i16 @fptosi_i16_half(half %x) nounwind strictfp { +; X64-SSE-LABEL: fptosi_i16_half: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: cvttss2si %xmm0, %eax +; X64-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX512-LABEL: fptosi_i16_half: +; X64-AVX512: # %bb.0: # %entry +; X64-AVX512-NEXT: vcvttsh2si %xmm0, %eax +; X64-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; X64-AVX512-NEXT: retq +; +; X86-LABEL: fptosi_i16_half: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __gnu_h2f_ieee +; X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-NEXT: fistps {{[0-9]+}}(%esp) +; X86-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %conv = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %x, metadata !"fpexcept.strict") #0 + ret i16 %conv +} + +attributes #0 = { strictfp } + + +declare i8 @llvm.experimental.constrained.fptosi.i8.f16(half, metadata) +declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata)