Index: compiler-rt/lib/builtins/CMakeLists.txt =================================================================== --- compiler-rt/lib/builtins/CMakeLists.txt +++ compiler-rt/lib/builtins/CMakeLists.txt @@ -193,6 +193,7 @@ # We only build BF16 files when "__bf16" is available. set(BF16_SOURCES + extendbfsf2.c truncdfbf2.c truncsfbf2.c ) Index: compiler-rt/lib/builtins/extendbfsf2.c =================================================================== --- /dev/null +++ compiler-rt/lib/builtins/extendbfsf2.c @@ -0,0 +1,13 @@ +//===-- lib/extendbfsf2.c - bfloat -> single conversion -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_BFLOAT +#define DST_SINGLE +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI float __extendbfsf2(src_t a) { return __extendXfYf2__(a); } Index: compiler-rt/lib/builtins/fp_extend.h =================================================================== --- compiler-rt/lib/builtins/fp_extend.h +++ compiler-rt/lib/builtins/fp_extend.h @@ -50,8 +50,15 @@ static const int srcSigBits = 10; #define src_rep_t_clz __builtin_clz +#elif defined SRC_BFLOAT +typedef __bf16 src_t; +typedef uint16_t src_rep_t; +#define SRC_REP_C UINT16_C +static const int srcSigBits = 7; +#define src_rep_t_clz __builtin_clz + #else -#error Source should be half, single, or double precision! +#error Source should be bfloat, half, single, or double precision! #endif // end source precision #if defined DST_SINGLE Index: llvm/include/llvm/IR/RuntimeLibcalls.def =================================================================== --- llvm/include/llvm/IR/RuntimeLibcalls.def +++ llvm/include/llvm/IR/RuntimeLibcalls.def @@ -291,6 +291,7 @@ HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2") HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2") HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee") +HANDLE_LIBCALL(FPEXT_BF16_F32, "__extendbfsf2") HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee") HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2") HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2") Index: llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -510,10 +510,11 @@ return BitConvertToInteger(Op); } - // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's - // entirely possible for both f16 and f32 to be legal, so use the fully - // hard-float FP_EXTEND rather than FP16_TO_FP. - if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { + // There's only a libcall for [b]f16 -> f32, so proceed in two stages. Also, + // it's entirely possible for both [b]f16 and f32 to be legal, so use the + // fully hard-float FP_EXTEND rather than {FP16,BF16}_TO_FP. + if ((Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16) && + N->getValueType(0) != MVT::f32) { if (IsStrict) { Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), { MVT::f32, MVT::Other }, { Chain, Op }); Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -240,6 +240,9 @@ return FPEXT_F16_F80; if (RetVT == MVT::f128) return FPEXT_F16_F128; + } else if (OpVT == MVT::bf16) { + if (RetVT == MVT::f32) + return FPEXT_BF16_F32; } else if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; Index: llvm/test/CodeGen/RISCV/bfloat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/bfloat.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64 + +; TODO: Enable codegen for hard float. + +define bfloat @float_to_bfloat(float %a) nounwind { +; RV32I-ILP32-LABEL: float_to_bfloat: +; RV32I-ILP32: # %bb.0: +; RV32I-ILP32-NEXT: addi sp, sp, -16 +; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32-NEXT: call __truncsfbf2@plt +; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32-NEXT: addi sp, sp, 16 +; RV32I-ILP32-NEXT: ret +; +; RV64I-LP64-LABEL: float_to_bfloat: +; RV64I-LP64: # %bb.0: +; RV64I-LP64-NEXT: addi sp, sp, -16 +; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64-NEXT: call __truncsfbf2@plt +; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64-NEXT: addi sp, sp, 16 +; RV64I-LP64-NEXT: ret + %1 = fptrunc float %a to bfloat + ret bfloat %1 +} + +define bfloat @double_to_bfloat(double %a) nounwind { +; RV32I-ILP32-LABEL: double_to_bfloat: +; RV32I-ILP32: # %bb.0: +; RV32I-ILP32-NEXT: addi sp, sp, -16 +; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32-NEXT: call __truncdfbf2@plt +; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32-NEXT: addi sp, sp, 16 +; RV32I-ILP32-NEXT: ret +; +; RV64I-LP64-LABEL: double_to_bfloat: +; RV64I-LP64: # %bb.0: +; RV64I-LP64-NEXT: addi sp, sp, -16 +; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64-NEXT: call __truncdfbf2@plt +; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64-NEXT: addi sp, sp, 16 +; RV64I-LP64-NEXT: ret + %1 = fptrunc double %a to bfloat + ret bfloat %1 +} + +define float @bfloat_to_float(bfloat %a) nounwind { +; RV32I-ILP32-LABEL: bfloat_to_float: +; RV32I-ILP32: # %bb.0: +; RV32I-ILP32-NEXT: addi sp, sp, -16 +; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32-NEXT: call __extendbfsf2@plt +; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32-NEXT: addi sp, sp, 16 +; RV32I-ILP32-NEXT: ret +; +; RV64I-LP64-LABEL: bfloat_to_float: +; RV64I-LP64: # %bb.0: +; RV64I-LP64-NEXT: addi sp, sp, -16 +; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64-NEXT: call __extendbfsf2@plt +; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64-NEXT: addi sp, sp, 16 +; RV64I-LP64-NEXT: ret + %1 = fpext bfloat %a to float + ret float %1 +} + +define double @bfloat_to_double(bfloat %a) nounwind { +; RV32I-ILP32-LABEL: bfloat_to_double: +; RV32I-ILP32: # %bb.0: +; RV32I-ILP32-NEXT: addi sp, sp, -16 +; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32-NEXT: call __extendbfsf2@plt +; RV32I-ILP32-NEXT: call __extendsfdf2@plt +; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32-NEXT: addi sp, sp, 16 +; RV32I-ILP32-NEXT: ret +; +; RV64I-LP64-LABEL: bfloat_to_double: +; RV64I-LP64: # %bb.0: +; RV64I-LP64-NEXT: addi sp, sp, -16 +; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64-NEXT: call __extendbfsf2@plt +; RV64I-LP64-NEXT: call __extendsfdf2@plt +; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64-NEXT: addi sp, sp, 16 +; RV64I-LP64-NEXT: ret + %1 = fpext bfloat %a to double + ret double %1 +}