diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4756,6 +4756,7 @@ break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: + case ISD::STRICT_FROUND: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FLOG: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1961,7 +1961,7 @@ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); setOperationAction(ISD::FROUND, MVT::f16, Custom); - setOperationAction(ISD::STRICT_FROUND, MVT::f16, Custom); + setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote); setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal); setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); @@ -22443,10 +22443,6 @@ /// compiling with trapping math, we can emulate this with /// floor(X + copysign(nextafter(0.5, 0.0), X)). static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) { - if (Op.getOpcode() == ISD::STRICT_FROUND && - Op.getSimpleValueType() == MVT::f16) - report_fatal_error("For now cannot emit strict round(fp16) at backend for " - "lacking library support."); SDValue N0 = Op.getOperand(0); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); @@ -31245,7 +31241,6 @@ case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: case ISD::FSUB: return lowerFaddFsub(Op, DAG); - case ISD::STRICT_FROUND: case ISD::FROUND: return LowerFROUND(Op, DAG); case ISD::FABS: case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll --- a/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll @@ -8,6 +8,7 @@ declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) +declare half @llvm.experimental.constrained.round.f16(half, metadata) define half @fceil32(half %f) #0 { ; X86-LABEL: fceil32: @@ -102,4 +103,37 @@ ret half %res } +define half @fround16(half %f) #0 { +; X86-LABEL: fround16: +; X86: # %bb.0: +; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: calll roundf +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: wait +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: fround16: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 +; X64-NEXT: callq roundf@PLT +; X64-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + + %res = call half @llvm.experimental.constrained.round.f16( + half %f, metadata !"fpexcept.strict") #0 + ret half %res +} + attributes #0 = { strictfp }