Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -342,7 +342,8 @@ if (Subtarget.hasStdExtZfhOrZfhmin()) { if (Subtarget.hasStdExtZfh()) { setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); - setOperationAction(FPRndMode, MVT::f16, Custom); + setOperationAction(FPRndMode, MVT::f16, + Subtarget.hasStdExtZfa() ? Legal : Custom); setOperationAction(ISD::SELECT, MVT::f16, Custom); } else { static const unsigned ZfhminPromoteOps[] = { @@ -371,7 +372,9 @@ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); setOperationAction(ISD::BR_CC, MVT::f16, Expand); - setOperationAction({ISD::FREM, ISD::FNEARBYINT, ISD::FPOW, ISD::FPOWI, + setOperationAction(ISD::FNEARBYINT, MVT::f16, + Subtarget.hasStdExtZfa() ? Legal : Promote); + setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10}, MVT::f16, Promote); @@ -391,7 +394,8 @@ if (Subtarget.hasStdExtF()) { setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); - setOperationAction(FPRndMode, MVT::f32, Custom); + setOperationAction(FPRndMode, MVT::f32, + Subtarget.hasStdExtZfa() ? Legal : Custom); setCondCodeAction(FPCCToExpand, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Custom); @@ -399,6 +403,9 @@ setOperationAction(FPOpToExpand, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); + + if (Subtarget.hasStdExtZfa()) + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); } if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) @@ -406,9 +413,17 @@ if (Subtarget.hasStdExtD()) { setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); - if (Subtarget.is64Bit()) { - setOperationAction(FPRndMode, MVT::f64, Custom); + if (Subtarget.hasStdExtZfa()) { + setOperationAction(FPRndMode, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, MVT::f64, Custom); } + + if (Subtarget.is64Bit()) + setOperationAction(FPRndMode, MVT::f64, + Subtarget.hasStdExtZfa() ? Legal : Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); @@ -3804,6 +3819,16 @@ DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); return FPConv; } + if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 && + Subtarget.hasStdExtZfa()) { + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op0, + DAG.getConstant(0, DL, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op0, + DAG.getConstant(1, DL, MVT::i32)); + SDValue RetReg = + DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); + return RetReg; + } // Consider other scalar<->scalar casts as legal if the types are legal. // Otherwise expand them. @@ -8012,6 +8037,14 @@ SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); + } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 && + Subtarget.hasStdExtZfa()) { + SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, + DAG.getVTList(MVT::i32, MVT::i32), Op0); + SDValue Lo = NewReg.getValue(0); + SDValue Hi = NewReg.getValue(1); + SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); + Results.push_back(RetReg); } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && isTypeLegal(Op0VT)) { // Custom-legalize bitcasts from fixed-length vector types to illegal @@ -11116,7 +11149,8 @@ } static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, - MachineBasicBlock *BB) { + MachineBasicBlock *BB, + const RISCVSubtarget &Subtarget) { assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); MachineFunction &MF = *BB->getParent(); @@ -11126,6 +11160,13 @@ Register LoReg = MI.getOperand(0).getReg(); Register HiReg = MI.getOperand(1).getReg(); Register SrcReg = MI.getOperand(2).getReg(); + if (Subtarget.hasStdExtD() && Subtarget.hasStdExtZfa() && !Subtarget.is64Bit()) { + BuildMI(*BB, MI, DL, TII.get(RISCV::FMV_X_W_FPR64), LoReg).addReg(SrcReg); + BuildMI(*BB, MI, DL, TII.get(RISCV::FMVH_X_D), HiReg).addReg(SrcReg); + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; + } + const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(MF); @@ -11149,7 +11190,8 @@ } static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, - MachineBasicBlock *BB) { + MachineBasicBlock *BB, + const RISCVSubtarget &Subtarget) { assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && "Unexpected instruction"); @@ -11160,6 +11202,14 @@ Register DstReg = MI.getOperand(0).getReg(); Register LoReg = MI.getOperand(1).getReg(); Register HiReg = MI.getOperand(2).getReg(); + if (Subtarget.hasStdExtD() && Subtarget.hasStdExtZfa() && !Subtarget.is64Bit()) { + BuildMI(*BB, MI, DL, TII.get(RISCV::FMVP_D_X), DstReg) + .addReg(LoReg) + .addReg(HiReg); + MI.eraseFromParent(); + return BB; + } + const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(MF); @@ -11675,9 +11725,9 @@ case RISCV::Select_FPR64_Using_CC_GPR: return emitSelectPseudo(MI, BB, Subtarget); case RISCV::BuildPairF64Pseudo: - return emitBuildPairF64Pseudo(MI, BB); + return emitBuildPairF64Pseudo(MI, BB, Subtarget); case RISCV::SplitF64Pseudo: - return emitSplitF64Pseudo(MI, BB); + return emitSplitF64Pseudo(MI, BB, Subtarget); case RISCV::PseudoQuietFLE_H: return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); case RISCV::PseudoQuietFLT_H: Index: llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -131,6 +131,10 @@ Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; def FMVP_D_X : FPBinaryOp_rr<0b1011001, 0b000, FPR64, GPR, "fmvp.d.x">, Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>; +let isCodeGenOnly = 1, mayRaiseFPException = 0 in { + def FMV_X_W_FPR64 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR64, "fmv.x.w">, + Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; + } } // Predicates = [HasStdExtZfa, HasStdExtD, IsRV32] let Predicates = [HasStdExtZfa, HasStdExtZfh] in { @@ -147,3 +151,75 @@ def FLTQ_H : FPCmp_rr<0b1010010, 0b101, "fltq.h", FPR16, /*Commutable*/ 1>; def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16, /*Commutable*/ 1>; } // Predicates = [HasStdExtZfa, HasStdExtZfh] + +//===----------------------------------------------------------------------===// +// Codegen patterns +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZfa] in { +def: PatFprFpr; +def: PatFprFpr; + +// frint rounds according to the current rounding mode and detects +// inexact conditions. +def: Pat<(any_frint FPR32 : $rs1), (FROUNDNX_S FPR32 : $rs1, 0b111)>; + +// fnearbyint is like frint but does not detect inexact conditions. +def: Pat<(any_fnearbyint FPR32 : $rs1), (FROUND_S FPR32 : $rs1, 0b111)>; + +def: Pat<(any_fround FPR32 : $rs1), (FROUND_S FPR32 : $rs1, 0b100)>; +def: Pat<(any_ffloor FPR32 : $rs1), (FROUND_S FPR32 : $rs1, 0b011)>; +def: Pat<(any_fceil FPR32 : $rs1), (FROUND_S FPR32 : $rs1, 0b010)>; +def: Pat<(any_ftrunc FPR32 : $rs1), (FROUND_S FPR32 : $rs1, 0b001)>; + +def: PatSetCC; +def: PatSetCC; +def: PatSetCC; +def: PatSetCC; +} // Predicates = [HasStdExtZfa] + +let Predicates = [HasStdExtZfa, HasStdExtD] in { +def: PatFprFpr; +def: PatFprFpr; + +// frint rounds according to the current rounding mode and detects +// inexact conditions. +def: Pat<(any_frint FPR64 : $rs1), (FROUNDNX_D FPR64 : $rs1, 0b111)>; + +// fnearbyint is like frint but does not detect inexact conditions. +def: Pat<(any_fnearbyint FPR64 : $rs1), (FROUND_D FPR64 : $rs1, 0b111)>; + +def: Pat<(any_fround FPR64 : $rs1), (FROUND_D FPR64 : $rs1, 0b100)>; +def: Pat<(any_froundeven FPR64 : $rs1), (FROUND_D FPR64 : $rs1, 0b000)>; +def: Pat<(any_ffloor FPR64 : $rs1), (FROUND_D FPR64 : $rs1, 0b011)>; +def: Pat<(any_fceil FPR64 : $rs1), (FROUND_D FPR64 : $rs1, 0b010)>; +def: Pat<(any_ftrunc FPR64 : $rs1), (FROUND_D FPR64 : $rs1, 0b001)>; + +def: PatSetCC; +def: PatSetCC; +def: PatSetCC; +def: PatSetCC; +} // Predicates = [HasStdExtZfa, HasStdExtD] + +let Predicates = [HasStdExtZfa, HasStdExtZfh] in { +def: PatFprFpr; +def: PatFprFpr; + +// frint rounds according to the current rounding mode and detects +// inexact conditions. +def: Pat<(any_frint FPR16 : $rs1), (FROUNDNX_H FPR16 : $rs1, 0b111)>; + +// fnearbyint is like frint but does not detect inexact conditions. +def: Pat<(any_fnearbyint FPR16 : $rs1), (FROUND_H FPR16 : $rs1, 0b111)>; + +def: Pat<(any_fround FPR16 : $rs1), (FROUND_H FPR16 : $rs1, 0b100)>; +def: Pat<(any_froundeven FPR16 : $rs1), (FROUND_H FPR16 : $rs1, 0b000)>; +def: Pat<(any_ffloor FPR16 : $rs1), (FROUND_H FPR16 : $rs1, 0b011)>; +def: Pat<(any_fceil FPR16 : $rs1), (FROUND_H FPR16 : $rs1, 0b010)>; +def: Pat<(any_ftrunc FPR16 : $rs1), (FROUND_H FPR16 : $rs1, 0b001)>; + +def: PatSetCC; +def: PatSetCC; +def: PatSetCC; +def: PatSetCC; +} // Predicates = [HasStdExtZfa, HasStdExtZfh] Index: llvm/test/CodeGen/RISCV/double-zfa.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/double-zfa.ll @@ -0,0 +1,199 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+experimental-zfa,+d < %s \ +; RUN: | FileCheck --check-prefix=RV32IDZFA %s +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+experimental-zfa,+d < %s \ +; RUN: | FileCheck --check-prefix=RV64DZFA %s + +declare double @llvm.minimum.f64(double, double) + +define double @fminm_d(double %a, double %b) nounwind { +; RV32IDZFA-LABEL: fminm_d: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fminm.d fa0, fa0, fa1 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fminm_d: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fminm.d fa0, fa0, fa1 +; RV64DZFA-NEXT: ret + %1 = call double @llvm.minimum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.maximum.f64(double, double) + +define double @fmaxm_d(double %a, double %b) nounwind { +; RV32IDZFA-LABEL: fmaxm_d: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fmaxm.d fa0, fa0, fa1 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fmaxm_d: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fmaxm.d fa0, fa0, fa1 +; RV64DZFA-NEXT: ret + %1 = call double @llvm.maximum.f64(double %a, double %b) + ret double %1 +} + +define double @fround_s_1(double %a) nounwind { +; RV32IDZFA-LABEL: fround_s_1: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fround.d fa0, fa0, rmm +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fround_s_1: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fround.d fa0, fa0, rmm +; RV64DZFA-NEXT: ret + %call = tail call double @round(double %a) nounwind readnone + ret double %call +} + +declare double @round(double) nounwind readnone + + +define double @fround_s_2(double %a) nounwind { +; RV32IDZFA-LABEL: fround_s_2: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fround.d fa0, fa0, rup +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fround_s_2: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fround.d fa0, fa0, rup +; RV64DZFA-NEXT: ret + %call = tail call double @floor(double %a) nounwind readnone + ret double %call +} + +declare double @floor(double) nounwind readnone + + +define double @fround_s_3(double %a) nounwind { +; RV32IDZFA-LABEL: fround_s_3: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fround.d fa0, fa0, rdn +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fround_s_3: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fround.d fa0, fa0, rdn +; RV64DZFA-NEXT: ret + %call = tail call double @ceil(double %a) nounwind readnone + ret double %call +} + +declare double @ceil(double) nounwind readnone + + +define double @fround_s_4(double %a) nounwind { +; RV32IDZFA-LABEL: fround_s_4: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fround.d fa0, fa0, rtz +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fround_s_4: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fround.d fa0, fa0, rtz +; RV64DZFA-NEXT: ret + %call = tail call double @trunc(double %a) nounwind readnone + ret double %call +} + +declare double @trunc(double) nounwind readnone + + +define double @fround_s_5(double %a) nounwind { +; RV32IDZFA-LABEL: fround_s_5: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fround.d fa0, fa0 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fround_s_5: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fround.d fa0, fa0 +; RV64DZFA-NEXT: ret + %call = tail call double @nearbyint(double %a) nounwind readnone + ret double %call +} + +declare double @nearbyint(double) nounwind readnone + + +define double @froundnx_s(double %a) nounwind { +; RV32IDZFA-LABEL: froundnx_s: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: froundnx.d fa0, fa0 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: froundnx_s: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: froundnx.d fa0, fa0 +; RV64DZFA-NEXT: ret + %call = tail call double @rint(double %a) nounwind readnone + ret double %call +} + +declare double @rint(double) nounwind readnone + +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + +define i32 @fcmp_olt_q(double %a, double %b) nounwind strictfp { +; RV32IDZFA-LABEL: fcmp_olt_q: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fltq.d a0, fa0, fa1 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fcmp_olt_q: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fltq.d a0, fa0, fa1 +; RV64DZFA-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole_q(double %a, double %b) nounwind strictfp { +; RV32IDZFA-LABEL: fcmp_ole_q: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fleq.d a0, fa0, fa1 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fcmp_ole_q: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fleq.d a0, fa0, fa1 +; RV64DZFA-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i64 @fmvh_x_d(double %fa) { +; RV32IDZFA-LABEL: fmvh_x_d: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fmv.x.w a0, fa0 +; RV32IDZFA-NEXT: fmvh.x.d a1, fa0 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fmvh_x_d: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fmv.x.d a0, fa0 +; RV64DZFA-NEXT: ret + %i = bitcast double %fa to i64 + ret i64 %i +} + +define double @fmvp_d_x(i64 %a) { +; RV32IDZFA-LABEL: fmvp_d_x: +; RV32IDZFA: # %bb.0: +; RV32IDZFA-NEXT: fmvp.d.x fa0, a0, a1 +; RV32IDZFA-NEXT: ret +; +; RV64DZFA-LABEL: fmvp_d_x: +; RV64DZFA: # %bb.0: +; RV64DZFA-NEXT: fmv.d.x fa0, a0 +; RV64DZFA-NEXT: ret + %or = bitcast i64 %a to double + ret double %or +} Index: llvm/test/CodeGen/RISCV/float-zfa.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/float-zfa.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32f -mattr=+experimental-zfa < %s \ +; RUN: | FileCheck --check-prefix=RV32IZFA %s +; RUN: llc -mtriple=riscv64 -target-abi lp64f -mattr=+experimental-zfa < %s \ +; RUN: | FileCheck --check-prefix=RV64IZFA %s + +declare float @llvm.minimum.f32(float, float) + +define float @fminm_s(float %a, float %b) nounwind { +; RV32IZFA-LABEL: fminm_s: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fminm.s fa0, fa0, fa1 +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fminm_s: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fminm.s fa0, fa0, fa1 +; RV64IZFA-NEXT: ret + %1 = call float @llvm.minimum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.maximum.f32(float, float) + +define float @fmaxm_s(float %a, float %b) nounwind { +; RV32IZFA-LABEL: fmaxm_s: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fmaxm.s fa0, fa0, fa1 +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fmaxm_s: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fmaxm.s fa0, fa0, fa1 +; RV64IZFA-NEXT: ret + %1 = call float @llvm.maximum.f32(float %a, float %b) + ret float %1 +} + + +define float @fround_s_1(float %a) nounwind { +; RV32IZFA-LABEL: fround_s_1: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fround.s fa0, fa0, rmm +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fround_s_1: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fround.s fa0, fa0, rmm +; RV64IZFA-NEXT: ret + %call = tail call float @roundf(float %a) nounwind readnone + ret float %call +} + +declare float @roundf(float) nounwind readnone + + +define float @fround_s_2(float %a) nounwind { +; RV32IZFA-LABEL: fround_s_2: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fround.s fa0, fa0, rup +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fround_s_2: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fround.s fa0, fa0, rup +; RV64IZFA-NEXT: ret + %call = tail call float @floorf(float %a) nounwind readnone + ret float %call +} + +declare float @floorf(float) nounwind readnone + + +define float @fround_s_3(float %a) nounwind { +; RV32IZFA-LABEL: fround_s_3: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fround.s fa0, fa0, rdn +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fround_s_3: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fround.s fa0, fa0, rdn +; RV64IZFA-NEXT: ret + %call = tail call float @ceilf(float %a) nounwind readnone + ret float %call +} + +declare float @ceilf(float) nounwind readnone + + +define float @fround_s_4(float %a) nounwind { +; RV32IZFA-LABEL: fround_s_4: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fround.s fa0, fa0, rtz +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fround_s_4: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fround.s fa0, fa0, rtz +; RV64IZFA-NEXT: ret + %call = tail call float @truncf(float %a) nounwind readnone + ret float %call +} + +declare float @truncf(float) nounwind readnone + + +define float @fround_s_5(float %a) nounwind { +; RV32IZFA-LABEL: fround_s_5: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fround.s fa0, fa0 +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fround_s_5: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fround.s fa0, fa0 +; RV64IZFA-NEXT: ret + %call = tail call float @nearbyintf(float %a) nounwind readnone + ret float %call +} + +declare float @nearbyintf(float) nounwind readnone + + +define float @froundnx_s(float %a) nounwind { +; RV32IZFA-LABEL: froundnx_s: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: froundnx.s fa0, fa0 +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: froundnx_s: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: froundnx.s fa0, fa0 +; RV64IZFA-NEXT: ret + %call = tail call float @rintf(float %a) nounwind readnone + ret float %call +} + +declare float @rintf(float) nounwind readnone + +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + +define i32 @fcmp_olt_q(float %a, float %b) nounwind strictfp { +; RV32IZFA-LABEL: fcmp_olt_q: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fltq.s a0, fa0, fa1 +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fcmp_olt_q: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fltq.s a0, fa0, fa1 +; RV64IZFA-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole_q(float %a, float %b) nounwind strictfp { +; RV32IZFA-LABEL: fcmp_ole_q: +; RV32IZFA: # %bb.0: +; RV32IZFA-NEXT: fleq.s a0, fa0, fa1 +; RV32IZFA-NEXT: ret +; +; RV64IZFA-LABEL: fcmp_ole_q: +; RV64IZFA: # %bb.0: +; RV64IZFA-NEXT: fleq.s a0, fa0, fa1 +; RV64IZFA-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} Index: llvm/test/CodeGen/RISCV/half-zfa.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/half-zfa.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32f -mattr=+experimental-zfa,+zfh < %s \ +; RUN: | FileCheck --check-prefix=RV32IHZFA %s +; RUN: llc -mtriple=riscv64 -target-abi lp64f -mattr=+experimental-zfa,+zfh < %s \ +; RUN: | FileCheck --check-prefix=RV64HZFA %s + +declare half @llvm.minimum.f16(half, half) + +define half @fminm_d(half %a, half %b) nounwind { +; RV32IHZFA-LABEL: fminm_d: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fminm.h fa0, fa0, fa1 +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fminm_d: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fminm.h fa0, fa0, fa1 +; RV64HZFA-NEXT: ret + %1 = call half @llvm.minimum.f16(half %a, half %b) + ret half %1 +} + +declare half @llvm.maximum.f16(half, half) + +define half @fmaxm_d(half %a, half %b) nounwind { +; RV32IHZFA-LABEL: fmaxm_d: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fmaxm.h fa0, fa0, fa1 +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fmaxm_d: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fmaxm.h fa0, fa0, fa1 +; RV64HZFA-NEXT: ret + %1 = tail call half @llvm.maximum.f16(half %a, half %b) + ret half %1 +} + +define half @fround_s_1(half %a) nounwind { +; RV32IHZFA-LABEL: fround_s_1: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fround.h fa0, fa0, rmm +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fround_s_1: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fround.h fa0, fa0, rmm +; RV64HZFA-NEXT: ret + %call = tail call half @llvm.round.f16(half %a) nounwind readnone + ret half %call +} + +declare half @llvm.round.f16(half) nounwind readnone + + +define half @fround_s_2(half %a) nounwind { +; RV32IHZFA-LABEL: fround_s_2: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fround.h fa0, fa0, rup +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fround_s_2: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fround.h fa0, fa0, rup +; RV64HZFA-NEXT: ret + %call = tail call half @llvm.floor.f16(half %a) nounwind readnone + ret half %call +} + +declare half @llvm.floor.f16(half) nounwind readnone + + +define half @fround_s_3(half %a) nounwind { +; RV32IHZFA-LABEL: fround_s_3: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fround.h fa0, fa0, rdn +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fround_s_3: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fround.h fa0, fa0, rdn +; RV64HZFA-NEXT: ret + %call = tail call half @llvm.ceil.f16(half %a) nounwind readnone + ret half %call +} + +declare half @llvm.ceil.f16(half) nounwind readnone + + +define half @fround_s_4(half %a) nounwind { +; RV32IHZFA-LABEL: fround_s_4: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fround.h fa0, fa0, rtz +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fround_s_4: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fround.h fa0, fa0, rtz +; RV64HZFA-NEXT: ret + %call = tail call half @llvm.trunc.f16(half %a) nounwind readnone + ret half %call +} + +declare half @llvm.trunc.f16(half) nounwind readnone + + +define half @fround_s_5(half %a) nounwind { +; RV32IHZFA-LABEL: fround_s_5: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fround.h fa0, fa0 +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fround_s_5: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fround.h fa0, fa0 +; RV64HZFA-NEXT: ret + %call = tail call half @llvm.nearbyint.f16(half %a) nounwind readnone + ret half %call +} + +declare half @llvm.nearbyint.f16(half) nounwind readnone + + +define half @froundnx_s(half %a) nounwind { +; RV32IHZFA-LABEL: froundnx_s: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: froundnx.h fa0, fa0 +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: froundnx_s: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: froundnx.h fa0, fa0 +; RV64HZFA-NEXT: ret + %call = tail call half @llvm.rint.f16(half %a) nounwind readnone + ret half %call +} + +declare half @llvm.rint.f16(half) nounwind readnone + +declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) + +define i32 @fcmp_olt_q(half %a, half %b) nounwind strictfp { +; RV32IHZFA-LABEL: fcmp_olt_q: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fltq.h a0, fa0, fa1 +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fcmp_olt_q: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fltq.h a0, fa0, fa1 +; RV64HZFA-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole_q(half %a, half %b) nounwind strictfp { +; RV32IHZFA-LABEL: fcmp_ole_q: +; RV32IHZFA: # %bb.0: +; RV32IHZFA-NEXT: fleq.h a0, fa0, fa1 +; RV32IHZFA-NEXT: ret +; +; RV64HZFA-LABEL: fcmp_ole_q: +; RV64HZFA: # %bb.0: +; RV64HZFA-NEXT: fleq.h a0, fa0, fa1 +; RV64HZFA-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +}