diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -368,7 +368,18 @@ if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) setOperationAction(ISD::BITCAST, MVT::i16, Custom); - + + static const unsigned ZfhminZfbfminPromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, + ISD::FSUB, ISD::FMUL, ISD::FMA, + ISD::FDIV, ISD::FSQRT, ISD::FABS, + ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, + ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, + ISD::FTRUNC, ISD::FRINT, ISD::FROUND, + ISD::FROUNDEVEN, ISD::SELECT}; + if (Subtarget.hasStdExtZfbfmin()) { setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::bf16, Custom); @@ -377,8 +388,12 @@ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); - setOperationAction(ISD::SELECT, MVT::bf16, Promote); - setOperationAction(ISD::SETCC, MVT::bf16, Promote); + setOperationAction(ISD::BR_CC, MVT::bf16, Expand); + setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); + setOperationAction(ISD::FREM, MVT::bf16, Promote); + // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the + // DAGCombiner::visitFP_ROUND probably needs improvements first. + setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); } if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { @@ -389,18 +404,7 @@ setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); } else { - static const unsigned ZfhminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, - ISD::FSUB, ISD::FMUL, ISD::FMA, - ISD::FDIV, ISD::FSQRT, ISD::FABS, - ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, - ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, - ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, - ISD::FTRUNC, ISD::FRINT, ISD::FROUND, - ISD::FROUNDEVEN, ISD::SELECT}; - - setOperationAction(ZfhminPromoteOps, MVT::f16, Promote); + setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote); setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, MVT::f16, Legal); diff --git a/llvm/test/CodeGen/RISCV/bfloat-arith.ll b/llvm/test/CodeGen/RISCV/bfloat-arith.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bfloat-arith.ll @@ -0,0 +1,499 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s + +; These tests descend from float-arith.ll, where each function was targeted at +; a particular RISC-V FPU instruction. + +define bfloat @fadd_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fadd_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fadd.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = fadd bfloat %a, %b + ret bfloat %1 +} + +define bfloat @fsub_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fsub_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fsub.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = fsub bfloat %a, %b + ret bfloat %1 +} + +define bfloat @fmul_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fmul_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fmul.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = fmul bfloat %a, %b + ret bfloat %1 +} + +define bfloat @fdiv_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fdiv_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fdiv.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = fdiv bfloat %a, %b + ret bfloat %1 +} + +declare bfloat @llvm.sqrt.bf16(bfloat) + +define bfloat @fsqrt_s(bfloat %a) nounwind { +; CHECK-LABEL: fsqrt_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fsqrt.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = call bfloat @llvm.sqrt.bf16(bfloat %a) + ret bfloat %1 +} + +declare bfloat @llvm.copysign.bf16(bfloat, bfloat) + +define bfloat @fsgnj_s(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: fsgnj_s: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: fsh fa1, 12(sp) +; RV32IZFBFMIN-NEXT: fsh fa0, 8(sp) +; RV32IZFBFMIN-NEXT: lbu a0, 13(sp) +; RV32IZFBFMIN-NEXT: lbu a1, 9(sp) +; RV32IZFBFMIN-NEXT: andi a0, a0, 128 +; RV32IZFBFMIN-NEXT: andi a1, a1, 127 +; RV32IZFBFMIN-NEXT: or a0, a1, a0 +; RV32IZFBFMIN-NEXT: sb a0, 9(sp) +; RV32IZFBFMIN-NEXT: flh fa0, 8(sp) +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: ret +; +; RV64IZFBFMIN-LABEL: fsgnj_s: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: fsh fa1, 8(sp) +; RV64IZFBFMIN-NEXT: fsh fa0, 0(sp) +; RV64IZFBFMIN-NEXT: lbu a0, 9(sp) +; RV64IZFBFMIN-NEXT: lbu a1, 1(sp) +; RV64IZFBFMIN-NEXT: andi a0, a0, 128 +; RV64IZFBFMIN-NEXT: andi a1, a1, 127 +; RV64IZFBFMIN-NEXT: or a0, a1, a0 +; RV64IZFBFMIN-NEXT: sb a0, 1(sp) +; RV64IZFBFMIN-NEXT: flh fa0, 0(sp) +; RV64IZFBFMIN-NEXT: addi sp, sp, 16 +; RV64IZFBFMIN-NEXT: ret + %1 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b) + ret bfloat %1 +} + +define i32 @fneg_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fneg_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fadd.s fa5, fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: feq.s a0, fa5, fa4 +; CHECK-NEXT: ret + %1 = fadd bfloat %a, %a + %2 = fneg bfloat %1 + %3 = fcmp oeq bfloat %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: fsgnjn_s: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 +; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 +; RV32IZFBFMIN-NEXT: fneg.s fa5, fa5 +; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 +; RV32IZFBFMIN-NEXT: fsh fa0, 8(sp) +; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp) +; RV32IZFBFMIN-NEXT: lbu a0, 9(sp) +; RV32IZFBFMIN-NEXT: lbu a1, 13(sp) +; RV32IZFBFMIN-NEXT: andi a0, a0, 127 +; RV32IZFBFMIN-NEXT: andi a1, a1, 128 +; RV32IZFBFMIN-NEXT: or a0, a0, a1 +; RV32IZFBFMIN-NEXT: sb a0, 9(sp) +; RV32IZFBFMIN-NEXT: flh fa0, 8(sp) +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: ret +; +; RV64IZFBFMIN-LABEL: fsgnjn_s: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 +; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 +; RV64IZFBFMIN-NEXT: fneg.s fa5, fa5 +; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 +; RV64IZFBFMIN-NEXT: fsh fa0, 0(sp) +; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp) +; RV64IZFBFMIN-NEXT: lbu a0, 1(sp) +; RV64IZFBFMIN-NEXT: lbu a1, 9(sp) +; RV64IZFBFMIN-NEXT: andi a0, a0, 127 +; RV64IZFBFMIN-NEXT: andi a1, a1, 128 +; RV64IZFBFMIN-NEXT: or a0, a0, a1 +; RV64IZFBFMIN-NEXT: sb a0, 1(sp) +; RV64IZFBFMIN-NEXT: flh fa0, 0(sp) +; RV64IZFBFMIN-NEXT: addi sp, sp, 16 +; RV64IZFBFMIN-NEXT: ret + %1 = fadd bfloat %a, %b + %2 = fneg bfloat %1 + %3 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %2) + ret bfloat %3 +} + +declare bfloat @llvm.fabs.bf16(bfloat) + +define bfloat @fabs_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fabs_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fadd.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fabs.s fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fadd.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = fadd bfloat %a, %b + %2 = call bfloat @llvm.fabs.bf16(bfloat %1) + %3 = fadd bfloat %2, %1 + ret bfloat %3 +} + +declare bfloat @llvm.minnum.bf16(bfloat, bfloat) + +define bfloat @fmin_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fmin_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fmin.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = call bfloat @llvm.minnum.bf16(bfloat %a, bfloat %b) + ret bfloat %1 +} + +declare bfloat @llvm.maxnum.bf16(bfloat, bfloat) + +define bfloat @fmax_s(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fmax_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fmax.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = call bfloat @llvm.maxnum.bf16(bfloat %a, bfloat %b) + ret bfloat %1 +} + +declare bfloat @llvm.fma.bf16(bfloat, bfloat, bfloat) + +define bfloat @fmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fmadd_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa2 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) + ret bfloat %1 +} + +define bfloat @fmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fmsub_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa2 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %c_ = fadd bfloat 0.0, %c ; avoid negation using xor + %negc = fsub bfloat -0.0, %c_ + %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %negc) + ret bfloat %1 +} + +define bfloat @fnmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmadd_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa2 +; CHECK-NEXT: fadd.s fa4, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fneg.s fa4, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 +; CHECK-NEXT: fmadd.s fa5, fa5, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %a_ = fadd bfloat 0.0, %a + %c_ = fadd bfloat 0.0, %c + %nega = fsub bfloat -0.0, %a_ + %negc = fsub bfloat -0.0, %c_ + %1 = call bfloat @llvm.fma.bf16(bfloat %nega, bfloat %b, bfloat %negc) + ret bfloat %1 +} + +define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmadd_s_2: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa2 +; CHECK-NEXT: fadd.s fa4, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fneg.s fa4, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %b_ = fadd bfloat 0.0, %b + %c_ = fadd bfloat 0.0, %c + %negb = fsub bfloat -0.0, %b_ + %negc = fsub bfloat -0.0, %c_ + %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %negb, bfloat %negc) + ret bfloat %1 +} + +define bfloat @fnmadd_s_3(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmadd_s_3: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa2 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) + %neg = fneg bfloat %1 + ret bfloat %neg +} + + +define bfloat @fnmadd_nsz(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmadd_nsz: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa2 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = call nsz bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) + %neg = fneg nsz bfloat %1 + ret bfloat %neg +} + +define bfloat @fnmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmsub_s: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 +; CHECK-NEXT: fmadd.s fa5, fa5, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %a_ = fadd bfloat 0.0, %a + %nega = fsub bfloat -0.0, %a_ + %1 = call bfloat @llvm.fma.bf16(bfloat %nega, bfloat %b, bfloat %c) + ret bfloat %1 +} + +define bfloat @fnmsub_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmsub_s_2: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %b_ = fadd bfloat 0.0, %b + %negb = fsub bfloat -0.0, %b_ + %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %negb, bfloat %c) + ret bfloat %1 +} + +define bfloat @fmadd_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fmadd_s_contract: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fmul.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = fmul contract bfloat %a, %b + %2 = fadd contract bfloat %1, %c + ret bfloat %2 +} + +define bfloat @fmsub_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fmsub_s_contract: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa2 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmul.s fa4, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fsub.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %c_ = fadd bfloat 0.0, %c ; avoid negation using xor + %1 = fmul contract bfloat %a, %b + %2 = fsub contract bfloat %1, %c_ + ret bfloat %2 +} + +define bfloat @fnmadd_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmadd_s_contract: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 +; CHECK-NEXT: fadd.s fa3, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa3, fa3 +; CHECK-NEXT: fcvt.s.bf16 fa2, fa2 +; CHECK-NEXT: fadd.s fa4, fa2, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa3 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fmul.s fa5, fa5, fa3 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fneg.s fa5, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fsub.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %a_ = fadd bfloat 0.0, %a ; avoid negation using xor + %b_ = fadd bfloat 0.0, %b ; avoid negation using xor + %c_ = fadd bfloat 0.0, %c ; avoid negation using xor + %1 = fmul contract bfloat %a_, %b_ + %2 = fneg bfloat %1 + %3 = fsub contract bfloat %2, %c_ + ret bfloat %3 +} + +define bfloat @fnmsub_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fnmsub_s_contract: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fmv.w.x fa4, zero +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 +; CHECK-NEXT: fadd.s fa4, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fmul.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 +; CHECK-NEXT: fsub.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %a_ = fadd bfloat 0.0, %a ; avoid negation using xor + %b_ = fadd bfloat 0.0, %b ; avoid negation using xor + %1 = fmul contract bfloat %a_, %b_ + %2 = fsub contract bfloat %c, %1 + ret bfloat %2 +} diff --git a/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll b/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll @@ -0,0 +1,612 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFBFMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFBFMIN %s + +declare void @abort() +declare void @exit(i32) +declare bfloat @dummy(bfloat) + +define void @br_fcmp_false(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_false: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: li a0, 1 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB0_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.then +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB0_2: # %if.else +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_false: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: li a0, 1 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB0_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.then +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB0_2: # %if.else +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp false bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + ret void +if.else: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oeq(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_oeq: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: feq.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB1_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB1_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_oeq: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: feq.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB1_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB1_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp oeq bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oeq_alt(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_oeq_alt: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: feq.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB2_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB2_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_oeq_alt: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: feq.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB2_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB2_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp oeq bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ogt(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_ogt: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV32IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB3_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB3_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_ogt: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV64IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB3_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB3_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp ogt bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oge(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_oge: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV32IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB4_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB4_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_oge: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV64IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB4_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB4_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp oge bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_olt(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_olt: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB5_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB5_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_olt: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB5_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB5_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp olt bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ole(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_ole: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB6_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB6_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_ole: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB6_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB6_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp ole bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_one(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_one: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: flt.s a1, fa5, fa4 +; RV32IZFBFMIN-NEXT: or a0, a1, a0 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB7_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB7_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_one: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: flt.s a1, fa5, fa4 +; RV64IZFBFMIN-NEXT: or a0, a1, a0 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB7_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB7_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp one bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ord(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_ord: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32IZFBFMIN-NEXT: feq.s a1, fa5, fa5 +; RV32IZFBFMIN-NEXT: and a0, a1, a0 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB8_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB8_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_ord: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64IZFBFMIN-NEXT: feq.s a1, fa5, fa5 +; RV64IZFBFMIN-NEXT: and a0, a1, a0 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB8_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB8_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp ord bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ueq(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_ueq: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: flt.s a1, fa5, fa4 +; RV32IZFBFMIN-NEXT: or a0, a1, a0 +; RV32IZFBFMIN-NEXT: beqz a0, .LBB9_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB9_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_ueq: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: flt.s a1, fa5, fa4 +; RV64IZFBFMIN-NEXT: or a0, a1, a0 +; RV64IZFBFMIN-NEXT: beqz a0, .LBB9_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB9_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp ueq bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ugt(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_ugt: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: beqz a0, .LBB10_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB10_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_ugt: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: beqz a0, .LBB10_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB10_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp ugt bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uge(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_uge: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: beqz a0, .LBB11_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB11_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_uge: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: beqz a0, .LBB11_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB11_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp uge bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ult(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_ult: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV32IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: beqz a0, .LBB12_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB12_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_ult: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV64IZFBFMIN-NEXT: fle.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: beqz a0, .LBB12_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB12_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp ult bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ule(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_ule: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV32IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: beqz a0, .LBB13_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB13_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_ule: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1 +; RV64IZFBFMIN-NEXT: flt.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: beqz a0, .LBB13_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB13_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp ule bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_une(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_une: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: feq.s a0, fa4, fa5 +; RV32IZFBFMIN-NEXT: beqz a0, .LBB14_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB14_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_une: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: feq.s a0, fa4, fa5 +; RV64IZFBFMIN-NEXT: beqz a0, .LBB14_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB14_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp une bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uno(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_uno: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32IZFBFMIN-NEXT: feq.s a1, fa5, fa5 +; RV32IZFBFMIN-NEXT: and a0, a1, a0 +; RV32IZFBFMIN-NEXT: beqz a0, .LBB15_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB15_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_uno: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64IZFBFMIN-NEXT: feq.s a1, fa5, fa5 +; RV64IZFBFMIN-NEXT: and a0, a1, a0 +; RV64IZFBFMIN-NEXT: beqz a0, .LBB15_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB15_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp uno bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_true(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: br_fcmp_true: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: li a0, 1 +; RV32IZFBFMIN-NEXT: bnez a0, .LBB16_2 +; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV32IZFBFMIN-NEXT: ret +; RV32IZFBFMIN-NEXT: .LBB16_2: # %if.then +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: call abort@plt +; +; RV64IZFBFMIN-LABEL: br_fcmp_true: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: li a0, 1 +; RV64IZFBFMIN-NEXT: bnez a0, .LBB16_2 +; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else +; RV64IZFBFMIN-NEXT: ret +; RV64IZFBFMIN-NEXT: .LBB16_2: # %if.then +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: call abort@plt + %1 = fcmp true bfloat %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} diff --git a/llvm/test/CodeGen/RISCV/bfloat-fcmp.ll b/llvm/test/CodeGen/RISCV/bfloat-fcmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bfloat-fcmp.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck %s + +define i32 @fcmp_false(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_false: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret + %1 = fcmp false bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oeq(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: ret + %1 = fcmp oeq bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: flt.s a0, fa4, fa5 +; CHECK-NEXT: ret + %1 = fcmp ogt bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fle.s a0, fa4, fa5 +; CHECK-NEXT: ret + %1 = fcmp oge bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: flt.s a0, fa4, fa5 +; CHECK-NEXT: ret + %1 = fcmp olt bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fle.s a0, fa4, fa5 +; CHECK-NEXT: ret + %1 = fcmp ole bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: flt.s a0, fa4, fa5 +; CHECK-NEXT: flt.s a1, fa5, fa4 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp one bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: feq.s a0, fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp ord bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: flt.s a0, fa4, fa5 +; CHECK-NEXT: flt.s a1, fa5, fa4 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp ueq bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fle.s a0, fa4, fa5 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp ugt bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: flt.s a0, fa4, fa5 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp uge bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fle.s a0, fa4, fa5 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp ult bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: flt.s a0, fa4, fa5 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp ule bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp une bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: feq.s a0, fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp uno bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_true(bfloat %a, bfloat %b) nounwind { +; CHECK-LABEL: fcmp_true: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: ret + %1 = fcmp true bfloat %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/bfloat-frem.ll b/llvm/test/CodeGen/RISCV/bfloat-frem.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bfloat-frem.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFBFMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFBFMIN %s + +define bfloat @frem_bf16(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: frem_bf16: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa1, fa1 +; RV32IZFBFMIN-NEXT: call fmodf@plt +; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa0 +; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: ret +; +; RV64IZFBFMIN-LABEL: frem_bf16: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa1, fa1 +; RV64IZFBFMIN-NEXT: call fmodf@plt +; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa0 +; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFBFMIN-NEXT: addi sp, sp, 16 +; RV64IZFBFMIN-NEXT: ret + %1 = frem bfloat %a, %b + ret bfloat %1 +} diff --git a/llvm/test/CodeGen/RISCV/bfloat-imm.ll b/llvm/test/CodeGen/RISCV/bfloat-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bfloat-imm.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck %s + +define bfloat @bfloat_imm() nounwind { +; CHECK-LABEL: bfloat_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: ret + ret bfloat 3.0 +} + +define bfloat @bfloat_imm_op(bfloat %a) nounwind { +; CHECK-LABEL: bfloat_imm_op: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: lui a0, 260096 +; CHECK-NEXT: fmv.w.x fa4, a0 +; CHECK-NEXT: fadd.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = fadd bfloat %a, 1.0 + ret bfloat %1 +} + +define bfloat @bfloat_zero() nounwind { +; CHECK-LABEL: bfloat_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flh fa0, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: ret + ret bfloat 0.0 +} + +define bfloat @bfloat_negative_zero() nounwind { +; CHECK-LABEL: bfloat_negative_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flh fa0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: ret + ret bfloat -0.0 +} diff --git a/llvm/test/CodeGen/RISCV/bfloat-isnan.ll b/llvm/test/CodeGen/RISCV/bfloat-isnan.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bfloat-isnan.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck %s + +define zeroext i1 @bfloat_is_nan(bfloat %a) nounwind { +; CHECK-LABEL: bfloat_is_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: feq.s a0, fa5, fa5 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = fcmp uno bfloat %a, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @bfloat_not_nan(bfloat %a) nounwind { +; CHECK-LABEL: bfloat_not_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: feq.s a0, fa5, fa5 +; CHECK-NEXT: ret + %1 = fcmp ord bfloat %a, 0.000000e+00 + ret i1 %1 +} diff --git a/llvm/test/CodeGen/RISCV/bfloat-mem.ll b/llvm/test/CodeGen/RISCV/bfloat-mem.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bfloat-mem.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s + +define bfloat @flh(ptr %a) nounwind { +; CHECK-LABEL: flh: +; CHECK: # %bb.0: +; CHECK-NEXT: flh fa5, 6(a0) +; CHECK-NEXT: flh fa4, 0(a0) +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: fadd.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %1 = load bfloat, ptr %a + %2 = getelementptr bfloat, ptr %a, i32 3 + %3 = load bfloat, ptr %2 +; Use both loaded values in an FP op to ensure an flh is used, even for the +; soft bfloat ABI + %4 = fadd bfloat %1, %3 + ret bfloat %4 +} + +define dso_local void @fsh(ptr %a, bfloat %b, bfloat %c) nounwind { +; CHECK-LABEL: fsh: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fadd.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fsh fa5, 0(a0) +; CHECK-NEXT: fsh fa5, 16(a0) +; CHECK-NEXT: ret + %1 = fadd bfloat %b, %c + store bfloat %1, ptr %a + %2 = getelementptr bfloat, ptr %a, i32 8 + store bfloat %1, ptr %2 + ret void +} + +; Check load and store to a global +@G = dso_local global bfloat 0.0 + +define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind { +; Use %a and %b in an FP op to ensure bfloat precision floating point registers +; are used, even for the soft bfloat ABI +; CHECK-LABEL: flh_fsh_global: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: fadd.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: lui a0, %hi(G) +; CHECK-NEXT: flh fa5, %lo(G)(a0) +; CHECK-NEXT: addi a1, a0, %lo(G) +; CHECK-NEXT: fsh fa0, %lo(G)(a0) +; CHECK-NEXT: flh fa5, 18(a1) +; CHECK-NEXT: fsh fa0, 18(a1) +; CHECK-NEXT: ret + %1 = fadd bfloat %a, %b + %2 = load volatile bfloat, ptr @G + store bfloat %1, ptr @G + %3 = getelementptr bfloat, ptr @G, i32 9 + %4 = load volatile bfloat, ptr %3 + store bfloat %1, ptr %3 + ret bfloat %1 +} + +; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1 +define bfloat @flh_fsh_constant(bfloat %a) nounwind { +; RV32IZFBFMIN-LABEL: flh_fsh_constant: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: lui a0, 912092 +; RV32IZFBFMIN-NEXT: flh fa5, -273(a0) +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 +; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 +; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 +; RV32IZFBFMIN-NEXT: fsh fa0, -273(a0) +; RV32IZFBFMIN-NEXT: ret +; +; RV64IZFBFMIN-LABEL: flh_fsh_constant: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: lui a0, 228023 +; RV64IZFBFMIN-NEXT: slli a0, a0, 2 +; RV64IZFBFMIN-NEXT: flh fa5, -273(a0) +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 +; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 +; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 +; RV64IZFBFMIN-NEXT: fsh fa0, -273(a0) +; RV64IZFBFMIN-NEXT: ret + %1 = inttoptr i32 3735928559 to ptr + %2 = load volatile bfloat, ptr %1 + %3 = fadd bfloat %a, %2 + store bfloat %3, ptr %1 + ret bfloat %3 +} + +declare void @notdead(ptr) + +define bfloat @flh_stack(bfloat %a) nounwind { +; RV32IZFBFMIN-LABEL: flh_stack: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fmv.s fs0, fa0 +; RV32IZFBFMIN-NEXT: addi a0, sp, 4 +; RV32IZFBFMIN-NEXT: call notdead@plt +; RV32IZFBFMIN-NEXT: flh fa5, 4(sp) +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 +; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4 +; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 +; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: ret +; +; RV64IZFBFMIN-LABEL: flh_stack: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV64IZFBFMIN-NEXT: fmv.s fs0, fa0 +; RV64IZFBFMIN-NEXT: mv a0, sp +; RV64IZFBFMIN-NEXT: call notdead@plt +; RV64IZFBFMIN-NEXT: flh fa5, 0(sp) +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 +; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4 +; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 +; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV64IZFBFMIN-NEXT: addi sp, sp, 16 +; RV64IZFBFMIN-NEXT: ret + %1 = alloca bfloat, align 4 + call void @notdead(ptr %1) + %2 = load bfloat, ptr %1 + %3 = fadd bfloat %2, %a ; force load in to FPR16 + ret bfloat %3 +} + +define dso_local void @fsh_stack(bfloat %a, bfloat %b) nounwind { +; RV32IZFBFMIN-LABEL: fsh_stack: +; RV32IZFBFMIN: # %bb.0: +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 +; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 +; RV32IZFBFMIN-NEXT: fsh fa5, 8(sp) +; RV32IZFBFMIN-NEXT: addi a0, sp, 8 +; RV32IZFBFMIN-NEXT: call notdead@plt +; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: ret +; +; RV64IZFBFMIN-LABEL: fsh_stack: +; RV64IZFBFMIN: # %bb.0: +; RV64IZFBFMIN-NEXT: addi sp, sp, -16 +; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 +; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 +; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 +; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 +; RV64IZFBFMIN-NEXT: fsh fa5, 4(sp) +; RV64IZFBFMIN-NEXT: addi a0, sp, 4 +; RV64IZFBFMIN-NEXT: call notdead@plt +; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFBFMIN-NEXT: addi sp, sp, 16 +; RV64IZFBFMIN-NEXT: ret + %1 = fadd bfloat %a, %b ; force store from FPR16 + %2 = alloca bfloat, align 4 + store bfloat %1, ptr %2 + call void @notdead(ptr %2) + ret void +}