Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3591,6 +3591,17 @@ ReplaceValueWith(SDValue(N, 1), Chain); } +// Helper for producing an FP_EXTEND/STRICT_FP_EXTEND of Op. +static SDValue fpExtendHelper(SDValue Op, SDValue &Chain, bool IsStrict, EVT VT, + SDLoc DL, SelectionDAG &DAG) { + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}); + Chain = Op.getValue(1); + return Op; + } + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Op); +} + void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -3611,6 +3622,11 @@ return; } + if (Op.getValueType() == MVT::bf16) { + // Extend to f32 as there is no bf16 libcall. + Op = fpExtendHelper(Op, Chain, IsStrict, MVT::f32, dl, DAG); + } + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -3643,6 +3659,11 @@ return; } + if (Op.getValueType() == MVT::bf16) { + // Extend to f32 as there is no bf16 libcall. + Op = fpExtendHelper(Op, Chain, IsStrict, MVT::f32, dl, DAG); + } + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -3673,14 +3694,9 @@ EVT VT = Op.getValueType(); if (VT == MVT::f16) { - VT = MVT::f32; // Extend to f32. - if (IsStrict) { - Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op}); - Chain = Op.getValue(1); - } else { - Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op); - } + VT = MVT::f32; + Op = fpExtendHelper(Op, Chain, IsStrict, VT, dl, DAG); } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; Index: llvm/test/CodeGen/RISCV/bfloat-convert.ll =================================================================== --- llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -215,31 +215,180 @@ } declare i32 @llvm.fptoui.sat.i32.bf16(bfloat) -; TODO: The following tests error on rv32. - -; define i64 @fcvt_l_bf16(bfloat %a) nounwind { -; %1 = fptosi bfloat %a to i64 -; ret i64 %1 -; } +define i64 @fcvt_l_bf16(bfloat %a) nounwind { +; CHECK32-LABEL: fcvt_l_bf16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-NEXT: fcvt.s.bf16 fa0, fa0 +; CHECK32-NEXT: call __fixsfdi@plt +; CHECK32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: fcvt_l_bf16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: fcvt.s.bf16 fa5, fa0, rne +; CHECK64-NEXT: fcvt.l.s a0, fa5, rtz +; CHECK64-NEXT: ret + %1 = fptosi bfloat %a to i64 + ret i64 %1 +} -; define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { -; start: -; %0 = tail call i64 @llvm.fptosi.sat.i64.bf16(bfloat %a) -; ret i64 %0 -; } -; declare i64 @llvm.fptosi.sat.i64.bf16(bfloat) +define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { +; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat: +; RV32IZFBFMIN: # %bb.0: # %start +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; RV32IZFBFMIN-NEXT: lui a0, 913408 +; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0 +; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFBFMIN-NEXT: call __fixsfdi@plt +; RV32IZFBFMIN-NEXT: lui a4, 524288 +; RV32IZFBFMIN-NEXT: lui a2, 524288 +; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %start +; RV32IZFBFMIN-NEXT: mv a2, a1 +; RV32IZFBFMIN-NEXT: .LBB10_2: # %start +; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) +; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4 +; RV32IZFBFMIN-NEXT: # %bb.3: +; RV32IZFBFMIN-NEXT: addi a2, a4, -1 +; RV32IZFBFMIN-NEXT: .LBB10_4: # %start +; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFBFMIN-NEXT: neg a4, a1 +; RV32IZFBFMIN-NEXT: and a1, a4, a2 +; RV32IZFBFMIN-NEXT: neg a2, a3 +; RV32IZFBFMIN-NEXT: neg a3, s0 +; RV32IZFBFMIN-NEXT: and a0, a3, a0 +; RV32IZFBFMIN-NEXT: or a0, a2, a0 +; RV32IZFBFMIN-NEXT: and a0, a4, a0 +; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: ret +; +; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat: +; R32IDZFBFMIN: # %bb.0: # %start +; R32IDZFBFMIN-NEXT: addi sp, sp, -16 +; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; R32IDZFBFMIN-NEXT: lui a0, 913408 +; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0 +; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 +; R32IDZFBFMIN-NEXT: call __fixsfdi@plt +; R32IDZFBFMIN-NEXT: lui a4, 524288 +; R32IDZFBFMIN-NEXT: lui a2, 524288 +; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2 +; R32IDZFBFMIN-NEXT: # %bb.1: # %start +; R32IDZFBFMIN-NEXT: mv a2, a1 +; R32IDZFBFMIN-NEXT: .LBB10_2: # %start +; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) +; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0 +; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4 +; R32IDZFBFMIN-NEXT: # %bb.3: +; R32IDZFBFMIN-NEXT: addi a2, a4, -1 +; R32IDZFBFMIN-NEXT: .LBB10_4: # %start +; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0 +; R32IDZFBFMIN-NEXT: neg a4, a1 +; R32IDZFBFMIN-NEXT: and a1, a4, a2 +; R32IDZFBFMIN-NEXT: neg a2, a3 +; R32IDZFBFMIN-NEXT: neg a3, s0 +; R32IDZFBFMIN-NEXT: and a0, a3, a0 +; R32IDZFBFMIN-NEXT: or a0, a2, a0 +; R32IDZFBFMIN-NEXT: and a0, a4, a0 +; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; R32IDZFBFMIN-NEXT: addi sp, sp, 16 +; R32IDZFBFMIN-NEXT: ret +; +; CHECK64-LABEL: fcvt_l_bf16_sat: +; CHECK64: # %bb.0: # %start +; CHECK64-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK64-NEXT: fcvt.l.s a0, fa5, rtz +; CHECK64-NEXT: feq.s a1, fa5, fa5 +; CHECK64-NEXT: seqz a1, a1 +; CHECK64-NEXT: addi a1, a1, -1 +; CHECK64-NEXT: and a0, a1, a0 +; CHECK64-NEXT: ret +start: + %0 = tail call i64 @llvm.fptosi.sat.i64.bf16(bfloat %a) + ret i64 %0 +} +declare i64 @llvm.fptosi.sat.i64.bf16(bfloat) -; define i64 @fcvt_lu_bf16(bfloat %a) nounwind { -; %1 = fptoui bfloat %a to i64 -; ret i64 %1 -; } +define i64 @fcvt_lu_bf16(bfloat %a) nounwind { +; CHECK32-LABEL: fcvt_lu_bf16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-NEXT: fcvt.s.bf16 fa0, fa0 +; CHECK32-NEXT: call __fixunssfdi@plt +; CHECK32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: fcvt_lu_bf16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: fcvt.s.bf16 fa5, fa0, rne +; CHECK64-NEXT: fcvt.lu.s a0, fa5, rtz +; CHECK64-NEXT: ret + %1 = fptoui bfloat %a to i64 + ret i64 %1 +} -; define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { -; start: -; %0 = tail call i64 @llvm.fptoui.sat.i64.bf16(bfloat %a) -; ret i64 %0 -; } -; declare i64 @llvm.fptoui.sat.i64.bf16(bfloat) +define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { +; CHECK32-LABEL: fcvt_lu_bf16_sat: +; CHECK32: # %bb.0: # %start +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK32-NEXT: flw fa5, %lo(.LCPI12_0)(a0) +; CHECK32-NEXT: fcvt.s.bf16 fa0, fa0 +; CHECK32-NEXT: flt.s a0, fa5, fa0 +; CHECK32-NEXT: neg s0, a0 +; CHECK32-NEXT: fmv.w.x fa5, zero +; CHECK32-NEXT: fle.s a0, fa5, fa0 +; CHECK32-NEXT: neg s1, a0 +; CHECK32-NEXT: call __fixunssfdi@plt +; CHECK32-NEXT: and a0, s1, a0 +; CHECK32-NEXT: or a0, s0, a0 +; CHECK32-NEXT: and a1, s1, a1 +; CHECK32-NEXT: or a1, s0, a1 +; CHECK32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: fcvt_lu_bf16_sat: +; CHECK64: # %bb.0: # %start +; CHECK64-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK64-NEXT: fcvt.lu.s a0, fa5, rtz +; CHECK64-NEXT: feq.s a1, fa5, fa5 +; CHECK64-NEXT: seqz a1, a1 +; CHECK64-NEXT: addi a1, a1, -1 +; CHECK64-NEXT: and a0, a1, a0 +; CHECK64-NEXT: ret +start: + %0 = tail call i64 @llvm.fptoui.sat.i64.bf16(bfloat %a) + ret i64 %0 +} +declare i64 @llvm.fptoui.sat.i64.bf16(bfloat) define bfloat @fcvt_bf16_si(i16 %a) nounwind { ; CHECK32-LABEL: fcvt_bf16_si: