Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -427,6 +427,9 @@ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); + setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); + setOperationAction(ISD::FP_TO_BF16, MVT::f32, + Subtarget.isSoftFPABI() ? LibCall : Custom); if (Subtarget.hasStdExtZfa()) setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); @@ -461,6 +464,9 @@ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); + setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); + setOperationAction(ISD::FP_TO_BF16, MVT::f64, + Subtarget.isSoftFPABI() ? LibCall : Custom); } if (Subtarget.is64Bit()) { @@ -4796,6 +4802,35 @@ case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); + case ISD::FP_TO_BF16: { + // Custom lower to ensure the libcall return is passed in an FPR on hard + // float ABIs. + assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization"); + SDLoc DL(Op); + MakeLibCallOptions CallOptions; + RTLIB::Libcall LC = + RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); + SDValue Res = + makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; + if (Subtarget.is64Bit()) + return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); + return DAG.getBitcast(MVT::i32, Res); + } + case ISD::BF16_TO_FP: { + assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization"); + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + Op = DAG.getNode(ISD::SHL, DL, Op.getOperand(0).getValueType(), + Op.getOperand(0), + DAG.getShiftAmountConstant(16, Subtarget.getXLenVT(), DL)); + SDValue Res = Subtarget.is64Bit() + ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) + : DAG.getBitcast(MVT::f32, Op); + // fp_extend if the target VT is bigger than f32. + if (VT != MVT::f32) + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res); + return Res; + } case ISD::FTRUNC: case ISD::FCEIL: case ISD::FFLOOR: @@ -16094,9 +16129,10 @@ unsigned NumParts, MVT PartVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); - if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { - // Cast the f16 to i16, extend to i32, pad with ones to make a float nan, - // and cast to f32. + if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && + PartVT == MVT::f32) { + // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float + // nan, and cast to f32. Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, @@ -16147,13 +16183,14 @@ SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); - if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { + if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && + PartVT == MVT::f32) { SDValue Val = Parts[0]; - // Cast the f32 to i32, truncate to i16, and cast back to f16. + // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); - Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); return Val; } Index: llvm/test/CodeGen/RISCV/bfloat.ll =================================================================== --- llvm/test/CodeGen/RISCV/bfloat.ll +++ llvm/test/CodeGen/RISCV/bfloat.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64 - -; TODO: Enable codegen for hard float. +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32 +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64 +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32D +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64D define bfloat @float_to_bfloat(float %a) nounwind { ; RV32I-ILP32-LABEL: float_to_bfloat: @@ -22,6 +24,54 @@ ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: float_to_bfloat: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: addi sp, sp, -16 +; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32-NEXT: lui a1, 1048560 +; RV32ID-ILP32-NEXT: or a0, a0, a1 +; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: float_to_bfloat: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: addi sp, sp, -16 +; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64-NEXT: call __truncsfbf2@plt +; RV64ID-LP64-NEXT: lui a1, 1048560 +; RV64ID-LP64-NEXT: or a0, a0, a1 +; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64-NEXT: addi sp, sp, 16 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: float_to_bfloat: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: addi sp, sp, -16 +; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: lui a1, 1048560 +; RV32ID-ILP32D-NEXT: or a0, a0, a1 +; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 +; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32D-NEXT: addi sp, sp, 16 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: float_to_bfloat: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: addi sp, sp, -16 +; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64D-NEXT: call __truncsfbf2@plt +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: lui a1, 1048560 +; RV64ID-LP64D-NEXT: or a0, a0, a1 +; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 +; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64D-NEXT: addi sp, sp, 16 +; RV64ID-LP64D-NEXT: ret %1 = fptrunc float %a to bfloat ret bfloat %1 } @@ -44,6 +94,54 @@ ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: double_to_bfloat: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: addi sp, sp, -16 +; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: call __truncdfbf2@plt +; RV32ID-ILP32-NEXT: lui a1, 1048560 +; RV32ID-ILP32-NEXT: or a0, a0, a1 +; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: double_to_bfloat: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: addi sp, sp, -16 +; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64-NEXT: call __truncdfbf2@plt +; RV64ID-LP64-NEXT: lui a1, 1048560 +; RV64ID-LP64-NEXT: or a0, a0, a1 +; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64-NEXT: addi sp, sp, 16 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: double_to_bfloat: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: addi sp, sp, -16 +; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32D-NEXT: call __truncdfbf2@plt +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: lui a1, 1048560 +; RV32ID-ILP32D-NEXT: or a0, a0, a1 +; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 +; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32D-NEXT: addi sp, sp, 16 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: double_to_bfloat: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: addi sp, sp, -16 +; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64D-NEXT: call __truncdfbf2@plt +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: lui a1, 1048560 +; RV64ID-LP64D-NEXT: or a0, a0, a1 +; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 +; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64D-NEXT: addi sp, sp, 16 +; RV64ID-LP64D-NEXT: ret %1 = fptrunc double %a to bfloat ret bfloat %1 } @@ -58,6 +156,34 @@ ; RV64I-LP64: # %bb.0: ; RV64I-LP64-NEXT: slliw a0, a0, 16 ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: bfloat_to_float: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: slli a0, a0, 16 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: bfloat_to_float: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: slli a0, a0, 48 +; RV64ID-LP64-NEXT: srli a0, a0, 48 +; RV64ID-LP64-NEXT: slli a0, a0, 16 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: bfloat_to_float: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: slli a0, a0, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: bfloat_to_float: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: slli a0, a0, 48 +; RV64ID-LP64D-NEXT: srli a0, a0, 48 +; RV64ID-LP64D-NEXT: slli a0, a0, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 +; RV64ID-LP64D-NEXT: ret %1 = fpext bfloat %a to float ret float %1 } @@ -82,6 +208,46 @@ ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: bfloat_to_double: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: addi sp, sp, -16 +; RV32ID-ILP32-NEXT: slli a0, a0, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: fcvt.d.s fa5, fa5 +; RV32ID-ILP32-NEXT: fsd fa5, 8(sp) +; RV32ID-ILP32-NEXT: lw a0, 8(sp) +; RV32ID-ILP32-NEXT: lw a1, 12(sp) +; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: bfloat_to_double: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: slli a0, a0, 48 +; RV64ID-LP64-NEXT: srli a0, a0, 48 +; RV64ID-LP64-NEXT: slli a0, a0, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: fcvt.d.s fa5, fa5 +; RV64ID-LP64-NEXT: fmv.x.d a0, fa5 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: bfloat_to_double: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: slli a0, a0, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32D-NEXT: fcvt.d.s fa0, fa5 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: bfloat_to_double: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: slli a0, a0, 48 +; RV64ID-LP64D-NEXT: srli a0, a0, 48 +; RV64ID-LP64D-NEXT: slli a0, a0, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64D-NEXT: fcvt.d.s fa0, fa5 +; RV64ID-LP64D-NEXT: ret %1 = fpext bfloat %a to double ret double %1 } @@ -94,6 +260,32 @@ ; RV64I-LP64-LABEL: i16_to_bfloat: ; RV64I-LP64: # %bb.0: ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: i16_to_bfloat: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: lui a1, 1048560 +; RV32ID-ILP32-NEXT: or a0, a0, a1 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: i16_to_bfloat: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: lui a1, 1048560 +; RV64ID-LP64-NEXT: or a0, a0, a1 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: i16_to_bfloat: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: lui a1, 1048560 +; RV32ID-ILP32D-NEXT: or a0, a0, a1 +; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: i16_to_bfloat: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: lui a1, 1048560 +; RV64ID-LP64D-NEXT: or a0, a0, a1 +; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 +; RV64ID-LP64D-NEXT: ret %1 = bitcast i16 %a to bfloat ret bfloat %1 } @@ -106,6 +298,24 @@ ; RV64I-LP64-LABEL: bfloat_to_i16: ; RV64I-LP64: # %bb.0: ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: bfloat_to_i16: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: bfloat_to_i16: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: bfloat_to_i16: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: bfloat_to_i16: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: ret %1 = bitcast bfloat %a to i16 ret i16 %1 } @@ -134,6 +344,88 @@ ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: bfloat_add: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: addi sp, sp, -16 +; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: slli a1, a1, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 +; RV32ID-ILP32-NEXT: slli a0, a0, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 +; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 +; RV32ID-ILP32-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32-NEXT: lui a1, 1048560 +; RV32ID-ILP32-NEXT: or a0, a0, a1 +; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: bfloat_add: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: addi sp, sp, -16 +; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64-NEXT: lui a2, 16 +; RV64ID-LP64-NEXT: addiw a2, a2, -1 +; RV64ID-LP64-NEXT: and a0, a0, a2 +; RV64ID-LP64-NEXT: and a1, a1, a2 +; RV64ID-LP64-NEXT: slli a1, a1, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 +; RV64ID-LP64-NEXT: slli a0, a0, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 +; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 +; RV64ID-LP64-NEXT: call __truncsfbf2@plt +; RV64ID-LP64-NEXT: lui a1, 1048560 +; RV64ID-LP64-NEXT: or a0, a0, a1 +; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64-NEXT: addi sp, sp, 16 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: bfloat_add: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: addi sp, sp, -16 +; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1 +; RV32ID-ILP32D-NEXT: slli a1, a1, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 +; RV32ID-ILP32D-NEXT: slli a0, a0, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 +; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: lui a1, 1048560 +; RV32ID-ILP32D-NEXT: or a0, a0, a1 +; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 +; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32D-NEXT: addi sp, sp, 16 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: bfloat_add: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: addi sp, sp, -16 +; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: lui a1, 16 +; RV64ID-LP64D-NEXT: addiw a1, a1, -1 +; RV64ID-LP64D-NEXT: and a0, a0, a1 +; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1 +; RV64ID-LP64D-NEXT: and a1, a2, a1 +; RV64ID-LP64D-NEXT: slli a1, a1, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 +; RV64ID-LP64D-NEXT: slli a0, a0, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 +; RV64ID-LP64D-NEXT: call __truncsfbf2@plt +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: lui a1, 1048560 +; RV64ID-LP64D-NEXT: or a0, a0, a1 +; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 +; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64D-NEXT: addi sp, sp, 16 +; RV64ID-LP64D-NEXT: ret %1 = fadd bfloat %a, %b ret bfloat %1 } @@ -166,6 +458,84 @@ ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: bfloat_load: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: addi sp, sp, -16 +; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: lhu a1, 6(a0) +; RV32ID-ILP32-NEXT: lhu a0, 0(a0) +; RV32ID-ILP32-NEXT: slli a1, a1, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 +; RV32ID-ILP32-NEXT: slli a0, a0, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 +; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 +; RV32ID-ILP32-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32-NEXT: lui a1, 1048560 +; RV32ID-ILP32-NEXT: or a0, a0, a1 +; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: bfloat_load: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: addi sp, sp, -16 +; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64-NEXT: lhu a1, 6(a0) +; RV64ID-LP64-NEXT: lhu a0, 0(a0) +; RV64ID-LP64-NEXT: slli a1, a1, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 +; RV64ID-LP64-NEXT: slli a0, a0, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 +; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 +; RV64ID-LP64-NEXT: call __truncsfbf2@plt +; RV64ID-LP64-NEXT: lui a1, 1048560 +; RV64ID-LP64-NEXT: or a0, a0, a1 +; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64-NEXT: addi sp, sp, 16 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: bfloat_load: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: addi sp, sp, -16 +; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32D-NEXT: lhu a1, 6(a0) +; RV32ID-ILP32D-NEXT: lhu a0, 0(a0) +; RV32ID-ILP32D-NEXT: slli a1, a1, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 +; RV32ID-ILP32D-NEXT: slli a0, a0, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 +; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: lui a1, 1048560 +; RV32ID-ILP32D-NEXT: or a0, a0, a1 +; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 +; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32D-NEXT: addi sp, sp, 16 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: bfloat_load: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: addi sp, sp, -16 +; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64D-NEXT: lhu a1, 6(a0) +; RV64ID-LP64D-NEXT: lhu a0, 0(a0) +; RV64ID-LP64D-NEXT: slli a1, a1, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 +; RV64ID-LP64D-NEXT: slli a0, a0, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 +; RV64ID-LP64D-NEXT: call __truncsfbf2@plt +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: lui a1, 1048560 +; RV64ID-LP64D-NEXT: or a0, a0, a1 +; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 +; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64D-NEXT: addi sp, sp, 16 +; RV64ID-LP64D-NEXT: ret %1 = load bfloat, ptr %a %2 = getelementptr bfloat, ptr %a, i32 3 %3 = load bfloat, ptr %2 @@ -207,6 +577,98 @@ ; RV64I-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret +; +; RV32ID-ILP32-LABEL: bfloat_store: +; RV32ID-ILP32: # %bb.0: +; RV32ID-ILP32-NEXT: addi sp, sp, -16 +; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: mv s0, a0 +; RV32ID-ILP32-NEXT: slli a2, a2, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a2 +; RV32ID-ILP32-NEXT: slli a1, a1, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 +; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 +; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 +; RV32ID-ILP32-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32-NEXT: sh a0, 0(s0) +; RV32ID-ILP32-NEXT: sh a0, 16(s0) +; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: ret +; +; RV64ID-LP64-LABEL: bfloat_store: +; RV64ID-LP64: # %bb.0: +; RV64ID-LP64-NEXT: addi sp, sp, -16 +; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64ID-LP64-NEXT: mv s0, a0 +; RV64ID-LP64-NEXT: lui a0, 16 +; RV64ID-LP64-NEXT: addiw a0, a0, -1 +; RV64ID-LP64-NEXT: and a1, a1, a0 +; RV64ID-LP64-NEXT: and a0, a2, a0 +; RV64ID-LP64-NEXT: slli a0, a0, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: slli a1, a1, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 +; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 +; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 +; RV64ID-LP64-NEXT: call __truncsfbf2@plt +; RV64ID-LP64-NEXT: sh a0, 0(s0) +; RV64ID-LP64-NEXT: sh a0, 16(s0) +; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64ID-LP64-NEXT: addi sp, sp, 16 +; RV64ID-LP64-NEXT: ret +; +; RV32ID-ILP32D-LABEL: bfloat_store: +; RV32ID-ILP32D: # %bb.0: +; RV32ID-ILP32D-NEXT: addi sp, sp, -16 +; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-ILP32D-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32D-NEXT: mv s0, a0 +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1 +; RV32ID-ILP32D-NEXT: slli a1, a1, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 +; RV32ID-ILP32D-NEXT: slli a0, a0, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 +; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt +; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 +; RV32ID-ILP32D-NEXT: sh a0, 0(s0) +; RV32ID-ILP32D-NEXT: sh a0, 16(s0) +; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32D-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-ILP32D-NEXT: addi sp, sp, 16 +; RV32ID-ILP32D-NEXT: ret +; +; RV64ID-LP64D-LABEL: bfloat_store: +; RV64ID-LP64D: # %bb.0: +; RV64ID-LP64D-NEXT: addi sp, sp, -16 +; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ID-LP64D-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64ID-LP64D-NEXT: mv s0, a0 +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: lui a1, 16 +; RV64ID-LP64D-NEXT: addiw a1, a1, -1 +; RV64ID-LP64D-NEXT: and a0, a0, a1 +; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1 +; RV64ID-LP64D-NEXT: and a1, a2, a1 +; RV64ID-LP64D-NEXT: slli a1, a1, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 +; RV64ID-LP64D-NEXT: slli a0, a0, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 +; RV64ID-LP64D-NEXT: call __truncsfbf2@plt +; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 +; RV64ID-LP64D-NEXT: sh a0, 0(s0) +; RV64ID-LP64D-NEXT: sh a0, 16(s0) +; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ID-LP64D-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64ID-LP64D-NEXT: addi sp, sp, 16 +; RV64ID-LP64D-NEXT: ret %1 = fadd bfloat %b, %c store bfloat %1, ptr %a %2 = getelementptr bfloat, ptr %a, i32 8