Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -5295,6 +5295,11 @@ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT DstVT = N->getValueType(0); + EVT OrigDstVT = DstVT; + // If converting to bf16, do it by first converting to f32 because there + // aren't direct int to bf16 libcalls. + if (DstVT == MVT::bf16) + DstVT = MVT::f32; RTLIB::Libcall LC = IsSigned ? RTLIB::getSINTTOFP(Op.getValueType(), DstVT) : RTLIB::getUINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && @@ -5304,9 +5309,15 @@ std::pair Tmp = TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain); - if (!IsStrict) + if (!IsStrict) { + if (OrigDstVT == MVT::bf16) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), MVT::bf16, Tmp.first, + DAG.getIntPtrConstant(0, SDLoc(N), /*isTarget=*/true)); return Tmp.first; + } + assert(OrigDstVT != MVT::bf16 && + "Don't know how to do STRICT_XINT_TO_FP with bf16 target"); ReplaceValueWith(SDValue(N, 1), Tmp.second); ReplaceValueWith(SDValue(N, 0), Tmp.first); return SDValue(); Index: llvm/test/CodeGen/RISCV/bfloat-convert.ll =================================================================== --- llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -1127,17 +1127,101 @@ ret bfloat %1 } -; TODO: The following tests error on rv32 with zfbfmin enabled. - -; define bfloat @fcvt_bf16_l(i64 %a) nounwind { -; %1 = sitofp i64 %a to bfloat -; ret bfloat %1 -; } +define bfloat @fcvt_bf16_l(i64 %a) nounwind { +; CHECK32ZFBFMIN-LABEL: fcvt_bf16_l: +; CHECK32ZFBFMIN: # %bb.0: +; CHECK32ZFBFMIN-NEXT: addi sp, sp, -16 +; CHECK32ZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32ZFBFMIN-NEXT: call __floatdisf@plt +; CHECK32ZFBFMIN-NEXT: fcvt.bf16.s fa0, fa0 +; CHECK32ZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32ZFBFMIN-NEXT: addi sp, sp, 16 +; CHECK32ZFBFMIN-NEXT: ret +; +; RV32ID-LABEL: fcvt_bf16_l: +; RV32ID: # %bb.0: +; RV32ID-NEXT: addi sp, sp, -16 +; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-NEXT: call __floatdisf@plt +; RV32ID-NEXT: call __truncsfbf2@plt +; RV32ID-NEXT: fmv.x.w a0, fa0 +; RV32ID-NEXT: lui a1, 1048560 +; RV32ID-NEXT: or a0, a0, a1 +; RV32ID-NEXT: fmv.w.x fa0, a0 +; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: ret +; +; CHECK64ZFBFMIN-LABEL: fcvt_bf16_l: +; CHECK64ZFBFMIN: # %bb.0: +; CHECK64ZFBFMIN-NEXT: fcvt.s.l fa5, a0 +; CHECK64ZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK64ZFBFMIN-NEXT: ret +; +; RV64ID-LABEL: fcvt_bf16_l: +; RV64ID: # %bb.0: +; RV64ID-NEXT: addi sp, sp, -16 +; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-NEXT: fcvt.s.l fa0, a0 +; RV64ID-NEXT: call __truncsfbf2@plt +; RV64ID-NEXT: fmv.x.w a0, fa0 +; RV64ID-NEXT: lui a1, 1048560 +; RV64ID-NEXT: or a0, a0, a1 +; RV64ID-NEXT: fmv.w.x fa0, a0 +; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-NEXT: addi sp, sp, 16 +; RV64ID-NEXT: ret + %1 = sitofp i64 %a to bfloat + ret bfloat %1 +} -; define bfloat @fcvt_bf16_lu(i64 %a) nounwind { -; %1 = uitofp i64 %a to bfloat -; ret bfloat %1 -; } +define bfloat @fcvt_bf16_lu(i64 %a) nounwind { +; CHECK32ZFBFMIN-LABEL: fcvt_bf16_lu: +; CHECK32ZFBFMIN: # %bb.0: +; CHECK32ZFBFMIN-NEXT: addi sp, sp, -16 +; CHECK32ZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32ZFBFMIN-NEXT: call __floatundisf@plt +; CHECK32ZFBFMIN-NEXT: fcvt.bf16.s fa0, fa0 +; CHECK32ZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32ZFBFMIN-NEXT: addi sp, sp, 16 +; CHECK32ZFBFMIN-NEXT: ret +; +; RV32ID-LABEL: fcvt_bf16_lu: +; RV32ID: # %bb.0: +; RV32ID-NEXT: addi sp, sp, -16 +; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-NEXT: call __floatundisf@plt +; RV32ID-NEXT: call __truncsfbf2@plt +; RV32ID-NEXT: fmv.x.w a0, fa0 +; RV32ID-NEXT: lui a1, 1048560 +; RV32ID-NEXT: or a0, a0, a1 +; RV32ID-NEXT: fmv.w.x fa0, a0 +; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: ret +; +; CHECK64ZFBFMIN-LABEL: fcvt_bf16_lu: +; CHECK64ZFBFMIN: # %bb.0: +; CHECK64ZFBFMIN-NEXT: fcvt.s.lu fa5, a0 +; CHECK64ZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK64ZFBFMIN-NEXT: ret +; +; RV64ID-LABEL: fcvt_bf16_lu: +; RV64ID: # %bb.0: +; RV64ID-NEXT: addi sp, sp, -16 +; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-NEXT: fcvt.s.lu fa0, a0 +; RV64ID-NEXT: call __truncsfbf2@plt +; RV64ID-NEXT: fmv.x.w a0, fa0 +; RV64ID-NEXT: lui a1, 1048560 +; RV64ID-NEXT: or a0, a0, a1 +; RV64ID-NEXT: fmv.w.x fa0, a0 +; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-NEXT: addi sp, sp, 16 +; RV64ID-NEXT: ret + %1 = uitofp i64 %a to bfloat + ret bfloat %1 +} define bfloat @fcvt_bf16_s(float %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_bf16_s: