diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -80,6 +80,7 @@ def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">, AssemblerPredicate<(all_of FeatureStdExtZfh), "'Zfh' (Half-Precision Floating-Point)">; +def NoStdExtZfh : Predicate<"!Subtarget->hasStdExtZfh()">; def HasStdExtZfhOrZfhmin : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZfhmin()">, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -100,7 +100,7 @@ // Set up the register classes. addRegisterClass(XLenVT, &RISCV::GPRRegClass); - if (Subtarget.hasStdExtZfh()) + if (Subtarget.hasStdExtZfhOrZfhmin()) addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtF()) addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); @@ -319,22 +319,44 @@ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; - if (Subtarget.hasStdExtZfh()) + if (Subtarget.hasStdExtZfhOrZfhmin()) setOperationAction(ISD::BITCAST, MVT::i16, Custom); - if (Subtarget.hasStdExtZfh()) { - setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); - setOperationAction(ISD::FCEIL, MVT::f16, Custom); - setOperationAction(ISD::FFLOOR, MVT::f16, Custom); - setOperationAction(ISD::FTRUNC, MVT::f16, Custom); - setOperationAction(ISD::FRINT, MVT::f16, Custom); - setOperationAction(ISD::FROUND, MVT::f16, Custom); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Custom); + if (Subtarget.hasStdExtZfhOrZfhmin()) { + if (Subtarget.hasStdExtZfh()) { + setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); + setOperationAction(ISD::FCEIL, MVT::f16, Custom); + setOperationAction(ISD::FFLOOR, MVT::f16, Custom); + setOperationAction(ISD::FTRUNC, MVT::f16, Custom); + setOperationAction(ISD::FRINT, MVT::f16, Custom); + setOperationAction(ISD::FROUND, MVT::f16, Custom); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Custom); + setOperationAction(ISD::SELECT, MVT::f16, Custom); + } else { + static const unsigned ZfhminPromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, + ISD::FSUB, ISD::FMUL, ISD::FMA, + ISD::FDIV, ISD::FSQRT, ISD::FABS, + ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, + ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, + ISD::FTRUNC, ISD::FRINT, ISD::FROUND, + ISD::FROUNDEVEN, ISD::SELECT}; + + setOperationAction(ZfhminPromoteOps, MVT::f16, Promote); + setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, + ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, + MVT::f16, Legal); + // FIXME: Need to promote f16 FCOPYSIGN to f32, but the + // DAGCombiner::visitFP_ROUND probably needs improvements first. + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); + } + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setCondCodeAction(FPCCToExpand, MVT::f16, Expand); setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); - setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f16, Expand); setOperationAction({ISD::FREM, ISD::FNEARBYINT, ISD::FPOW, ISD::FPOWI, @@ -958,7 +980,7 @@ // Custom-legalize bitcasts from fixed-length vectors to scalar types. setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, Custom); - if (Subtarget.hasStdExtZfh()) + if (Subtarget.hasStdExtZfhOrZfhmin()) setOperationAction(ISD::BITCAST, MVT::f16, Custom); if (Subtarget.hasStdExtF()) setOperationAction(ISD::BITCAST, MVT::f32, Custom); @@ -1003,7 +1025,7 @@ if (Subtarget.hasStdExtZbkb()) setTargetDAGCombine(ISD::BITREVERSE); - if (Subtarget.hasStdExtZfh()) + if (Subtarget.hasStdExtZfhOrZfhmin()) setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); if (Subtarget.hasStdExtF()) setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, @@ -1433,8 +1455,7 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { - // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. - if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) + if (VT == MVT::f16 && !Subtarget.hasStdExtZfhOrZfhmin()) return false; if (VT == MVT::f32 && !Subtarget.hasStdExtF()) return false; @@ -1477,7 +1498,7 @@ } bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { - return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || + return (VT == MVT::f16 && Subtarget.hasStdExtZfhOrZfhmin()) || (VT == MVT::f32 && Subtarget.hasStdExtF()) || (VT == MVT::f64 && Subtarget.hasStdExtD()); } @@ -1485,10 +1506,10 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // Use f32 to pass f16 if it is legal and Zfh is not enabled. + // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. // We might still end up using a GPR but that will be decided based on ABI. - // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. - if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) + if (VT == MVT::f16 && Subtarget.hasStdExtF() && + !Subtarget.hasStdExtZfhOrZfhmin()) return MVT::f32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); @@ -1497,10 +1518,10 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // Use f32 to pass f16 if it is legal and Zfh is not enabled. + // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. // We might still end up using a GPR but that will be decided based on ABI. - // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. - if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) + if (VT == MVT::f16 && Subtarget.hasStdExtF() && + !Subtarget.hasStdExtZfhOrZfhmin()) return 1; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); @@ -3583,7 +3604,8 @@ SDValue Op0 = Op.getOperand(0); EVT Op0VT = Op0.getValueType(); MVT XLenVT = Subtarget.getXLenVT(); - if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) { + if (VT == MVT::f16 && Op0VT == MVT::i16 && + Subtarget.hasStdExtZfhOrZfhmin()) { SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); return FPConv; @@ -7741,7 +7763,8 @@ SDValue Op0 = N->getOperand(0); EVT Op0VT = Op0.getValueType(); MVT XLenVT = Subtarget.getXLenVT(); - if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) { + if (VT == MVT::i16 && Op0VT == MVT::f16 && + Subtarget.hasStdExtZfhOrZfhmin()) { SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && @@ -12772,7 +12795,7 @@ break; return std::make_pair(0U, &RISCV::GPRRegClass); case 'f': - if (Subtarget.hasStdExtZfh() && VT == MVT::f16) + if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) return std::make_pair(0U, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtF() && VT == MVT::f32) return std::make_pair(0U, &RISCV::FPR32RegClass); @@ -12885,7 +12908,7 @@ } if (VT == MVT::f32 || VT == MVT::Other) return std::make_pair(FReg, &RISCV::FPR32RegClass); - if (Subtarget.hasStdExtZfh() && VT == MVT::f16) { + if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) { unsigned RegNo = FReg - RISCV::F0_F; unsigned HReg = RISCV::F0_H + RegNo; return std::make_pair(HReg, &RISCV::FPR16RegClass); @@ -13196,7 +13219,7 @@ switch (FPVT.getSimpleVT().SimpleTy) { case MVT::f16: - return Subtarget.hasStdExtZfh(); + return Subtarget.hasStdExtZfhOrZfhmin(); case MVT::f32: return Subtarget.hasStdExtF(); case MVT::f64: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -282,7 +282,17 @@ RISCVII::VLMUL LMul = RISCVII::LMUL_1; unsigned SubRegIdx = RISCV::sub_vrm1_0; if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::FSGNJ_H; + if (!STI.hasStdExtZfh() && STI.hasStdExtZfhmin()) { + // Zfhmin subset doesn't have FSGNJ_H, replaces FSGNJ_H with FSGNJ_S. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, + &RISCV::FPR32RegClass); + SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, + &RISCV::FPR32RegClass); + Opc = RISCV::FSGNJ_S; + } else { + Opc = RISCV::FSGNJ_H; + } IsScalableVector = false; } else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::FSGNJ_S; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -253,8 +253,7 @@ let Predicates = [HasStdExtZfh] in { -/// Float constants -def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>; +// Floating point constant -0.0 def : Pat<(f16 (fpimmneg0)), (FSGNJN_H (FMV_H_X X0), (FMV_H_X X0))>; /// Float conversion operations @@ -350,6 +349,9 @@ defm : StPat; +/// Floating point constant +0.0 +def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>; + /// Float conversion operations // f32 -> f16, f16 -> f32 @@ -423,3 +425,57 @@ (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>; def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>; } // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] + +let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in { +// Floating point constant -0.0 +def : Pat<(f16 (fpimmneg0)), (FCVT_H_S (FSGNJN_S (FMV_W_X X0), (FMV_W_X X0)), 0b111)>; +} // Predicates = [HasStdExtZfhmin, NoStdExtZfh] + +let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] in { +def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>; + +// half->[u]int. Round-to-zero must be used. +def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_S (FCVT_S_H $rs1), 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_S (FCVT_S_H $rs1), 0b001)>; + +// half->int32 with current rounding mode. +def : Pat<(i32 (any_lrint FPR16:$rs1)), (FCVT_W_S (FCVT_S_H $rs1), 0b111)>; + +// half->int32 rounded to nearest with ties rounded away from zero. +def : Pat<(i32 (any_lround FPR16:$rs1)), (FCVT_W_S (FCVT_S_H $rs1), 0b100)>; + +// [u]int->half. Match GCC and default to using dynamic rounding mode. +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S (FCVT_S_W $rs1, 0b111), 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S (FCVT_S_WU $rs1, 0b111), 0b111)>; + +// Saturating float->[u]int32. +def : Pat<(i32 (riscv_fcvt_x FPR16:$rs1, timm:$frm)), (FCVT_W_S (FCVT_S_H $rs1), timm:$frm)>; +def : Pat<(i32 (riscv_fcvt_xu FPR16:$rs1, timm:$frm)), (FCVT_WU_S (FCVT_S_H $rs1), timm:$frm)>; +} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] + +let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in { +def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>; + +// half->[u]int64. Round-to-zero must be used. +def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_S (FCVT_S_H $rs1), 0b001)>; + +// half->int64 with current rounding mode. +def : Pat<(i64 (any_lrint FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b111)>; +def : Pat<(i64 (any_llrint FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b111)>; + +// half->int64 rounded to nearest with ties rounded away from zero. +def : Pat<(i64 (any_lround FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b100)>; +def : Pat<(i64 (any_llround FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b100)>; + +// [u]int->fp. Match GCC and default to using dynamic rounding mode. +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_S (FCVT_S_L $rs1, 0b111), 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_S (FCVT_S_LU $rs1, 0b111), 0b111)>; + +// Saturating float->[u]int64. +def : Pat<(i64 (riscv_fcvt_x FPR16:$rs1, timm:$frm)), (FCVT_L_S (FCVT_S_H $rs1), timm:$frm)>; +def : Pat<(i64 (riscv_fcvt_xu FPR16:$rs1, timm:$frm)), (FCVT_LU_S (FCVT_S_H $rs1), timm:$frm)>; + +def : Pat<(riscv_any_fcvt_w_rv64 FPR16:$rs1, timm:$frm), (FCVT_W_S (FCVT_S_H $rs1), timm:$frm)>; +def : Pat<(riscv_any_fcvt_wu_rv64 FPR16:$rs1, timm:$frm), (FCVT_WU_S (FCVT_S_H $rs1), timm:$frm)>; +} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -175,6 +175,7 @@ bool hasStdExtZvfh() const { return HasStdExtZvfh; } bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } + bool hasStdExtZfhOrZfhmin() const { return HasStdExtZfh || HasStdExtZfhmin; } bool hasStdExtZfinx() const { return HasStdExtZfinx; } bool hasStdExtZdinx() const { return HasStdExtZdinx; } bool hasStdExtZhinxmin() const { return HasStdExtZhinxmin; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -309,7 +309,7 @@ return RISCVRegisterClass::GPRRC; Type *ScalarTy = Ty->getScalarType(); - if ((ScalarTy->isHalfTy() && ST->hasStdExtZfh()) || + if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhOrZfhmin()) || (ScalarTy->isFloatTy() && ST->hasStdExtF()) || (ScalarTy->isDoubleTy() && ST->hasStdExtD())) { return RISCVRegisterClass::FPRRC; diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -109,32 +109,16 @@ ; ; RV32-ILP32ZFHMIN-LABEL: callee_half_in_regs: ; RV32-ILP32ZFHMIN: # %bb.0: -; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 -; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: mv s0, a0 -; RV32-ILP32ZFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ILP32ZFHMIN-NEXT: call __extendhfsf2@plt -; RV32-ILP32ZFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ILP32ZFHMIN-NEXT: add a0, s0, a0 -; RV32-ILP32ZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-ILP32ZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, 16 +; RV32-ILP32ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32-ILP32ZFHMIN-NEXT: fcvt.w.s a1, ft0, rtz +; RV32-ILP32ZFHMIN-NEXT: add a0, a0, a1 ; RV32-ILP32ZFHMIN-NEXT: ret ; ; RV64-LP64ZFHMIN-LABEL: callee_half_in_regs: ; RV64-LP64ZFHMIN: # %bb.0: -; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 -; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: mv s0, a0 -; RV64-LP64ZFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-LP64ZFHMIN-NEXT: call __extendhfsf2@plt -; RV64-LP64ZFHMIN-NEXT: fcvt.l.s a0, fa0, rtz -; RV64-LP64ZFHMIN-NEXT: addw a0, s0, a0 -; RV64-LP64ZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-LP64ZFHMIN-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64-LP64ZFHMIN-NEXT: addi sp, sp, 16 +; RV64-LP64ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64-LP64ZFHMIN-NEXT: fcvt.w.s a1, ft0, rtz +; RV64-LP64ZFHMIN-NEXT: addw a0, a0, a1 ; RV64-LP64ZFHMIN-NEXT: ret %b_fptosi = fptosi half %b to i32 %1 = add i32 %a, %b_fptosi @@ -217,7 +201,7 @@ ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 ; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-ILP32ZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) -; RV32-ILP32ZFHMIN-NEXT: flw fa0, %lo(.LCPI1_0)(a0) +; RV32-ILP32ZFHMIN-NEXT: flh fa0, %lo(.LCPI1_0)(a0) ; RV32-ILP32ZFHMIN-NEXT: li a0, 1 ; RV32-ILP32ZFHMIN-NEXT: call callee_half_in_regs@plt ; RV32-ILP32ZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -229,7 +213,7 @@ ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 ; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-LP64ZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) -; RV64-LP64ZFHMIN-NEXT: flw fa0, %lo(.LCPI1_0)(a0) +; RV64-LP64ZFHMIN-NEXT: flh fa0, %lo(.LCPI1_0)(a0) ; RV64-LP64ZFHMIN-NEXT: li a0, 1 ; RV64-LP64ZFHMIN-NEXT: call callee_half_in_regs@plt ; RV64-LP64ZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -334,32 +318,16 @@ ; ; RV32-ILP32ZFHMIN-LABEL: callee_half_on_stack: ; RV32-ILP32ZFHMIN: # %bb.0: -; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 -; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: mv s0, a7 -; RV32-ILP32ZFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ILP32ZFHMIN-NEXT: call __extendhfsf2@plt -; RV32-ILP32ZFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ILP32ZFHMIN-NEXT: add a0, s0, a0 -; RV32-ILP32ZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-ILP32ZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, 16 +; RV32-ILP32ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32-ILP32ZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-ILP32ZFHMIN-NEXT: add a0, a7, a0 ; RV32-ILP32ZFHMIN-NEXT: ret ; ; RV64-LP64ZFHMIN-LABEL: callee_half_on_stack: ; RV64-LP64ZFHMIN: # %bb.0: -; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 -; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: mv s0, a7 -; RV64-LP64ZFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-LP64ZFHMIN-NEXT: call __extendhfsf2@plt -; RV64-LP64ZFHMIN-NEXT: fcvt.l.s a0, fa0, rtz -; RV64-LP64ZFHMIN-NEXT: addw a0, s0, a0 -; RV64-LP64ZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-LP64ZFHMIN-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64-LP64ZFHMIN-NEXT: addi sp, sp, 16 +; RV64-LP64ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64-LP64ZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64-LP64ZFHMIN-NEXT: addw a0, a7, a0 ; RV64-LP64ZFHMIN-NEXT: ret %1 = fptosi half %i to i32 %2 = add i32 %h, %1 @@ -490,7 +458,7 @@ ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 ; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-ILP32ZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) -; RV32-ILP32ZFHMIN-NEXT: flw fa0, %lo(.LCPI3_0)(a0) +; RV32-ILP32ZFHMIN-NEXT: flh fa0, %lo(.LCPI3_0)(a0) ; RV32-ILP32ZFHMIN-NEXT: li a0, 1 ; RV32-ILP32ZFHMIN-NEXT: li a1, 2 ; RV32-ILP32ZFHMIN-NEXT: li a2, 3 @@ -509,7 +477,7 @@ ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 ; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-LP64ZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-LP64ZFHMIN-NEXT: flw fa0, %lo(.LCPI3_0)(a0) +; RV64-LP64ZFHMIN-NEXT: flh fa0, %lo(.LCPI3_0)(a0) ; RV64-LP64ZFHMIN-NEXT: li a0, 1 ; RV64-LP64ZFHMIN-NEXT: li a1, 2 ; RV64-LP64ZFHMIN-NEXT: li a2, 3 @@ -567,13 +535,13 @@ ; RV32-ILP32ZFHMIN-LABEL: callee_half_ret: ; RV32-ILP32ZFHMIN: # %bb.0: ; RV32-ILP32ZFHMIN-NEXT: lui a0, %hi(.LCPI4_0) -; RV32-ILP32ZFHMIN-NEXT: flw fa0, %lo(.LCPI4_0)(a0) +; RV32-ILP32ZFHMIN-NEXT: flh fa0, %lo(.LCPI4_0)(a0) ; RV32-ILP32ZFHMIN-NEXT: ret ; ; RV64-LP64ZFHMIN-LABEL: callee_half_ret: ; RV64-LP64ZFHMIN: # %bb.0: ; RV64-LP64ZFHMIN-NEXT: lui a0, %hi(.LCPI4_0) -; RV64-LP64ZFHMIN-NEXT: flw fa0, %lo(.LCPI4_0)(a0) +; RV64-LP64ZFHMIN-NEXT: flh fa0, %lo(.LCPI4_0)(a0) ; RV64-LP64ZFHMIN-NEXT: ret ret half 1.0 } @@ -658,9 +626,8 @@ ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 ; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-ILP32ZFHMIN-NEXT: call callee_half_ret@plt -; RV32-ILP32ZFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ILP32ZFHMIN-NEXT: call __extendhfsf2@plt -; RV32-ILP32ZFHMIN-NEXT: fcvt.w.s a0, fa0, rtz +; RV32-ILP32ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32-ILP32ZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz ; RV32-ILP32ZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, 16 ; RV32-ILP32ZFHMIN-NEXT: ret @@ -670,9 +637,8 @@ ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 ; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-LP64ZFHMIN-NEXT: call callee_half_ret@plt -; RV64-LP64ZFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-LP64ZFHMIN-NEXT: call __extendhfsf2@plt -; RV64-LP64ZFHMIN-NEXT: fcvt.l.s a0, fa0, rtz +; RV64-LP64ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64-LP64ZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz ; RV64-LP64ZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, 16 ; RV64-LP64ZFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -18,6 +18,15 @@ ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ ; RUN: -mattr=+zfh -target-abi lp64d < %s \ ; RUN: | FileCheck %s -check-prefix=RV64IFDZFH +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f \ +; RUN: -mattr=+zfhmin -target-abi ilp32f < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IFZFHMIN +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -mattr=+zfhmin -target-abi ilp32d < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IFDZFHMIN +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -mattr=+zfhmin -target-abi lp64d < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IFDZFHMIN ; Test fcopysign scenarios where the sign argument is casted to the type of the ; magnitude argument. Those casts can be folded away by the DAGCombiner. @@ -89,6 +98,28 @@ ; RV64IFDZFH-NEXT: fcvt.d.s ft0, fa1 ; RV64IFDZFH-NEXT: fsgnj.d fa0, fa0, ft0 ; RV64IFDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fold_promote_d_s: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: fmv.x.w a2, fa0 +; RV32IFZFHMIN-NEXT: lui a3, 524288 +; RV32IFZFHMIN-NEXT: and a2, a2, a3 +; RV32IFZFHMIN-NEXT: slli a1, a1, 1 +; RV32IFZFHMIN-NEXT: srli a1, a1, 1 +; RV32IFZFHMIN-NEXT: or a1, a1, a2 +; RV32IFZFHMIN-NEXT: ret +; +; RV32IFDZFHMIN-LABEL: fold_promote_d_s: +; RV32IFDZFHMIN: # %bb.0: +; RV32IFDZFHMIN-NEXT: fcvt.d.s ft0, fa1 +; RV32IFDZFHMIN-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFDZFHMIN-NEXT: ret +; +; RV64IFDZFHMIN-LABEL: fold_promote_d_s: +; RV64IFDZFHMIN: # %bb.0: +; RV64IFDZFHMIN-NEXT: fcvt.d.s ft0, fa1 +; RV64IFDZFHMIN-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFDZFHMIN-NEXT: ret %c = fpext float %b to double %t = call double @llvm.copysign.f64(double %a, double %c) ret double %t @@ -178,6 +209,29 @@ ; RV64IFDZFH-NEXT: fcvt.d.h ft0, fa1 ; RV64IFDZFH-NEXT: fsgnj.d fa0, fa0, ft0 ; RV64IFDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fold_promote_d_h: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: fmv.x.h a2, fa0 +; RV32IFZFHMIN-NEXT: lui a3, 8 +; RV32IFZFHMIN-NEXT: and a2, a2, a3 +; RV32IFZFHMIN-NEXT: slli a2, a2, 16 +; RV32IFZFHMIN-NEXT: slli a1, a1, 1 +; RV32IFZFHMIN-NEXT: srli a1, a1, 1 +; RV32IFZFHMIN-NEXT: or a1, a1, a2 +; RV32IFZFHMIN-NEXT: ret +; +; RV32IFDZFHMIN-LABEL: fold_promote_d_h: +; RV32IFDZFHMIN: # %bb.0: +; RV32IFDZFHMIN-NEXT: fcvt.d.h ft0, fa1 +; RV32IFDZFHMIN-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFDZFHMIN-NEXT: ret +; +; RV64IFDZFHMIN-LABEL: fold_promote_d_h: +; RV64IFDZFHMIN: # %bb.0: +; RV64IFDZFHMIN-NEXT: fcvt.d.h ft0, fa1 +; RV64IFDZFHMIN-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFDZFHMIN-NEXT: ret %c = fpext half %b to double %t = call double @llvm.copysign.f64(double %a, double %c) ret double %t @@ -263,6 +317,24 @@ ; RV64IFDZFH-NEXT: fcvt.s.h ft0, fa1 ; RV64IFDZFH-NEXT: fsgnj.s fa0, fa0, ft0 ; RV64IFDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fold_promote_f_h: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IFZFHMIN-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFZFHMIN-NEXT: ret +; +; RV32IFDZFHMIN-LABEL: fold_promote_f_h: +; RV32IFDZFHMIN: # %bb.0: +; RV32IFDZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IFDZFHMIN-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFDZFHMIN-NEXT: ret +; +; RV64IFDZFHMIN-LABEL: fold_promote_f_h: +; RV64IFDZFHMIN: # %bb.0: +; RV64IFDZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IFDZFHMIN-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFDZFHMIN-NEXT: ret %c = fpext half %b to float %t = call float @llvm.copysign.f32(float %a, float %c) ret float %t @@ -323,6 +395,24 @@ ; RV64IFDZFH-NEXT: fcvt.s.d ft0, fa1 ; RV64IFDZFH-NEXT: fsgnj.s fa0, fa0, ft0 ; RV64IFDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fold_demote_s_d: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: fmv.w.x ft0, a1 +; RV32IFZFHMIN-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFZFHMIN-NEXT: ret +; +; RV32IFDZFHMIN-LABEL: fold_demote_s_d: +; RV32IFDZFHMIN: # %bb.0: +; RV32IFDZFHMIN-NEXT: fcvt.s.d ft0, fa1 +; RV32IFDZFHMIN-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFDZFHMIN-NEXT: ret +; +; RV64IFDZFHMIN-LABEL: fold_demote_s_d: +; RV64IFDZFHMIN: # %bb.0: +; RV64IFDZFHMIN-NEXT: fcvt.s.d ft0, fa1 +; RV64IFDZFHMIN-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFDZFHMIN-NEXT: ret %c = fptrunc double %b to float %t = call float @llvm.copysign.f32(float %a, float %c) ret float %t @@ -410,6 +500,53 @@ ; RV64IFDZFH-NEXT: fcvt.h.s ft0, fa1 ; RV64IFDZFH-NEXT: fsgnj.h fa0, fa0, ft0 ; RV64IFDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fold_demote_h_s: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: fsh fa0, 12(sp) +; RV32IFZFHMIN-NEXT: fmv.x.w a0, fa1 +; RV32IFZFHMIN-NEXT: lbu a1, 13(sp) +; RV32IFZFHMIN-NEXT: lui a2, 524288 +; RV32IFZFHMIN-NEXT: and a0, a0, a2 +; RV32IFZFHMIN-NEXT: srli a0, a0, 24 +; RV32IFZFHMIN-NEXT: andi a1, a1, 127 +; RV32IFZFHMIN-NEXT: or a0, a1, a0 +; RV32IFZFHMIN-NEXT: sb a0, 13(sp) +; RV32IFZFHMIN-NEXT: flh fa0, 12(sp) +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; RV32IFDZFHMIN-LABEL: fold_demote_h_s: +; RV32IFDZFHMIN: # %bb.0: +; RV32IFDZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFDZFHMIN-NEXT: fsh fa0, 12(sp) +; RV32IFDZFHMIN-NEXT: fmv.x.w a0, fa1 +; RV32IFDZFHMIN-NEXT: lbu a1, 13(sp) +; RV32IFDZFHMIN-NEXT: lui a2, 524288 +; RV32IFDZFHMIN-NEXT: and a0, a0, a2 +; RV32IFDZFHMIN-NEXT: srli a0, a0, 24 +; RV32IFDZFHMIN-NEXT: andi a1, a1, 127 +; RV32IFDZFHMIN-NEXT: or a0, a1, a0 +; RV32IFDZFHMIN-NEXT: sb a0, 13(sp) +; RV32IFDZFHMIN-NEXT: flh fa0, 12(sp) +; RV32IFDZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFDZFHMIN-NEXT: ret +; +; RV64IFDZFHMIN-LABEL: fold_demote_h_s: +; RV64IFDZFHMIN: # %bb.0: +; RV64IFDZFHMIN-NEXT: addi sp, sp, -16 +; RV64IFDZFHMIN-NEXT: fsw fa1, 8(sp) +; RV64IFDZFHMIN-NEXT: fsh fa0, 0(sp) +; RV64IFDZFHMIN-NEXT: lbu a0, 11(sp) +; RV64IFDZFHMIN-NEXT: lbu a1, 1(sp) +; RV64IFDZFHMIN-NEXT: andi a0, a0, 128 +; RV64IFDZFHMIN-NEXT: andi a1, a1, 127 +; RV64IFDZFHMIN-NEXT: or a0, a1, a0 +; RV64IFDZFHMIN-NEXT: sb a0, 1(sp) +; RV64IFDZFHMIN-NEXT: flh fa0, 0(sp) +; RV64IFDZFHMIN-NEXT: addi sp, sp, 16 +; RV64IFDZFHMIN-NEXT: ret %c = fptrunc float %b to half %t = call half @llvm.copysign.f16(half %a, half %c) ret half %t @@ -501,6 +638,54 @@ ; RV64IFDZFH-NEXT: fcvt.h.d ft0, fa1 ; RV64IFDZFH-NEXT: fsgnj.h fa0, fa0, ft0 ; RV64IFDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fold_demote_h_d: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: fsh fa0, 8(sp) +; RV32IFZFHMIN-NEXT: srli a1, a1, 16 +; RV32IFZFHMIN-NEXT: fmv.h.x ft0, a1 +; RV32IFZFHMIN-NEXT: fsh ft0, 12(sp) +; RV32IFZFHMIN-NEXT: lbu a0, 9(sp) +; RV32IFZFHMIN-NEXT: lbu a1, 13(sp) +; RV32IFZFHMIN-NEXT: andi a0, a0, 127 +; RV32IFZFHMIN-NEXT: andi a1, a1, 128 +; RV32IFZFHMIN-NEXT: or a0, a0, a1 +; RV32IFZFHMIN-NEXT: sb a0, 9(sp) +; RV32IFZFHMIN-NEXT: flh fa0, 8(sp) +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; RV32IFDZFHMIN-LABEL: fold_demote_h_d: +; RV32IFDZFHMIN: # %bb.0: +; RV32IFDZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFDZFHMIN-NEXT: fsd fa1, 8(sp) +; RV32IFDZFHMIN-NEXT: fsh fa0, 4(sp) +; RV32IFDZFHMIN-NEXT: lbu a0, 15(sp) +; RV32IFDZFHMIN-NEXT: lbu a1, 5(sp) +; RV32IFDZFHMIN-NEXT: andi a0, a0, 128 +; RV32IFDZFHMIN-NEXT: andi a1, a1, 127 +; RV32IFDZFHMIN-NEXT: or a0, a1, a0 +; RV32IFDZFHMIN-NEXT: sb a0, 5(sp) +; RV32IFDZFHMIN-NEXT: flh fa0, 4(sp) +; RV32IFDZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFDZFHMIN-NEXT: ret +; +; RV64IFDZFHMIN-LABEL: fold_demote_h_d: +; RV64IFDZFHMIN: # %bb.0: +; RV64IFDZFHMIN-NEXT: addi sp, sp, -16 +; RV64IFDZFHMIN-NEXT: fsh fa0, 8(sp) +; RV64IFDZFHMIN-NEXT: lbu a0, 9(sp) +; RV64IFDZFHMIN-NEXT: andi a0, a0, 127 +; RV64IFDZFHMIN-NEXT: fmv.x.d a1, fa1 +; RV64IFDZFHMIN-NEXT: srli a1, a1, 63 +; RV64IFDZFHMIN-NEXT: slli a1, a1, 63 +; RV64IFDZFHMIN-NEXT: srli a1, a1, 56 +; RV64IFDZFHMIN-NEXT: or a0, a0, a1 +; RV64IFDZFHMIN-NEXT: sb a0, 9(sp) +; RV64IFDZFHMIN-NEXT: flh fa0, 8(sp) +; RV64IFDZFHMIN-NEXT: addi sp, sp, 16 +; RV64IFDZFHMIN-NEXT: ret %c = fptrunc double %b to half %t = call half @llvm.copysign.f16(half %a, half %c) ret half %t diff --git a/llvm/test/CodeGen/RISCV/half-arith-strict.ll b/llvm/test/CodeGen/RISCV/half-arith-strict.ll --- a/llvm/test/CodeGen/RISCV/half-arith-strict.ll +++ b/llvm/test/CodeGen/RISCV/half-arith-strict.ll @@ -3,6 +3,12 @@ ; RUN: -disable-strictnode-mutation -target-abi ilp32f < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -disable-strictnode-mutation -target-abi lp64f < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -disable-strictnode-mutation -target-abi ilp32f < %s \ +; RUN: | FileCheck -check-prefix=CHECK-ZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -disable-strictnode-mutation -target-abi lp64f < %s \ +; RUN: | FileCheck -check-prefix=CHECK-ZFHMIN %s ; FIXME: We can't test without Zfh because soft promote legalization isn't ; implemented in SelectionDAG for STRICT nodes. @@ -12,6 +18,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fadd.h fa0, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fadd_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK-ZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.fadd.f16(half %a, half %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -22,6 +36,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fsub.h fa0, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fsub_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK-ZFHMIN-NEXT: fsub.s ft0, ft1, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.fsub.f16(half %a, half %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -32,6 +54,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fmul.h fa0, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fmul_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK-ZFHMIN-NEXT: fmul.s ft0, ft1, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.fmul.f16(half %a, half %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -42,6 +72,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fdiv.h fa0, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fdiv_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK-ZFHMIN-NEXT: fdiv.s ft0, ft1, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.fdiv.f16(half %a, half %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -52,6 +90,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fsqrt.h fa0, fa0 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fsqrt_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK-ZFHMIN-NEXT: fsqrt.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.sqrt.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -76,6 +121,15 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fmadd.h fa0, fa0, fa1, fa2 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fmadd_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK-ZFHMIN-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.fma.f16(half %a, half %b, half %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -88,6 +142,22 @@ ; CHECK-NEXT: fadd.h ft0, fa2, ft0 ; CHECK-NEXT: fmsub.h fa0, fa0, fa1, ft0 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fmsub_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECK-ZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECK-ZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK-ZFHMIN-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %c_ = fadd half 0.0, %c ; avoid negation using xor %negc = fneg half %c_ %1 = call half @llvm.experimental.constrained.fma.f16(half %a, half %b, half %negc, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp @@ -102,6 +172,28 @@ ; CHECK-NEXT: fadd.h ft0, fa2, ft0 ; CHECK-NEXT: fnmadd.h fa0, ft1, fa1, ft0 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fnmadd_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK-ZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECK-ZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa2 +; CHECK-ZFHMIN-NEXT: fadd.s ft1, ft2, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECK-ZFHMIN-NEXT: fneg.s ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa1 +; CHECK-ZFHMIN-NEXT: fmadd.s ft0, ft0, ft2, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %a_ = fadd half 0.0, %a %c_ = fadd half 0.0, %c %nega = fneg half %a_ @@ -118,6 +210,28 @@ ; CHECK-NEXT: fadd.h ft0, fa2, ft0 ; CHECK-NEXT: fnmadd.h fa0, ft1, fa0, ft0 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fnmadd_h_2: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECK-ZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECK-ZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa2 +; CHECK-ZFHMIN-NEXT: fadd.s ft1, ft2, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECK-ZFHMIN-NEXT: fneg.s ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK-ZFHMIN-NEXT: fmadd.s ft0, ft2, ft0, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %b_ = fadd half 0.0, %b %c_ = fadd half 0.0, %c %negb = fneg half %b_ @@ -133,6 +247,22 @@ ; CHECK-NEXT: fadd.h ft0, fa0, ft0 ; CHECK-NEXT: fnmsub.h fa0, ft0, fa1, fa2 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fnmsub_h: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK-ZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECK-ZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa2 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa1 +; CHECK-ZFHMIN-NEXT: fmadd.s ft0, ft0, ft2, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %a_ = fadd half 0.0, %a %nega = fneg half %a_ %1 = call half @llvm.experimental.constrained.fma.f16(half %nega, half %b, half %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp @@ -146,6 +276,22 @@ ; CHECK-NEXT: fadd.h ft0, fa1, ft0 ; CHECK-NEXT: fnmsub.h fa0, ft0, fa0, fa2 ; CHECK-NEXT: ret +; +; CHECK-ZFHMIN-LABEL: fnmsub_h_2: +; CHECK-ZFHMIN: # %bb.0: +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECK-ZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECK-ZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft1, fa2 +; CHECK-ZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK-ZFHMIN-NEXT: fmadd.s ft0, ft2, ft0, ft1 +; CHECK-ZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK-ZFHMIN-NEXT: ret %b_ = fadd half 0.0, %b %negb = fneg half %b_ %1 = call half @llvm.experimental.constrained.fma.f16(half %a, half %negb, half %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -7,6 +7,10 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN,CHECK-RV32-FSGNJ %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck --check-prefixes=CHECKIZFHMIN,CHECK-RV64-FSGNJ %s ; These tests are each targeted at a particular RISC-V FPU instruction. ; Compares and conversions can be found in half-fcmp.ll and half-convert.ll @@ -70,6 +74,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fadd_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fadd half %a, %b ret half %1 } @@ -131,6 +143,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fsub_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fsub.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fsub half %a, %b ret half %1 } @@ -192,6 +212,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmul_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fmul.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fmul half %a, %b ret half %1 } @@ -253,6 +281,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fdiv_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fdiv.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fdiv half %a, %b ret half %1 } @@ -290,6 +326,13 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fsqrt_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fsqrt.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.sqrt.f16(half %a) ret half %1 } @@ -319,6 +362,64 @@ ; RV64I-NEXT: srli a0, a0, 49 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret +; +; CHECK-RV32-FSGNJ-LABEL: fsgnj_s: +; CHECK-RV32-FSGNJ: # %bb.0: +; CHECK-RV32-FSGNJ-NEXT: addi sp, sp, -16 +; CHECK-RV32-FSGNJ-NEXT: fsh fa1, 12(sp) +; CHECK-RV32-FSGNJ-NEXT: fsh fa0, 8(sp) +; CHECK-RV32-FSGNJ-NEXT: lbu a0, 13(sp) +; CHECK-RV32-FSGNJ-NEXT: lbu a1, 9(sp) +; CHECK-RV32-FSGNJ-NEXT: andi a0, a0, 128 +; CHECK-RV32-FSGNJ-NEXT: andi a1, a1, 127 +; CHECK-RV32-FSGNJ-NEXT: or a0, a1, a0 +; CHECK-RV32-FSGNJ-NEXT: sb a0, 9(sp) +; CHECK-RV32-FSGNJ-NEXT: flh fa0, 8(sp) +; CHECK-RV32-FSGNJ-NEXT: addi sp, sp, 16 +; CHECK-RV32-FSGNJ-NEXT: ret +; +; CHECK-RV64-FSGNJ-LABEL: fsgnj_s: +; CHECK-RV64-FSGNJ: # %bb.0: +; CHECK-RV64-FSGNJ-NEXT: addi sp, sp, -16 +; CHECK-RV64-FSGNJ-NEXT: fsh fa1, 8(sp) +; CHECK-RV64-FSGNJ-NEXT: fsh fa0, 0(sp) +; CHECK-RV64-FSGNJ-NEXT: lbu a0, 9(sp) +; CHECK-RV64-FSGNJ-NEXT: lbu a1, 1(sp) +; CHECK-RV64-FSGNJ-NEXT: andi a0, a0, 128 +; CHECK-RV64-FSGNJ-NEXT: andi a1, a1, 127 +; CHECK-RV64-FSGNJ-NEXT: or a0, a1, a0 +; CHECK-RV64-FSGNJ-NEXT: sb a0, 1(sp) +; CHECK-RV64-FSGNJ-NEXT: flh fa0, 0(sp) +; CHECK-RV64-FSGNJ-NEXT: addi sp, sp, 16 +; CHECK-RV64-FSGNJ-NEXT: ret +; CHECKFSGNJ-LABEL: fsgnj_s: +; CHECKFSGNJ: # %bb.0: +; CHECKFSGNJ-NEXT: addi sp, sp, -16 +; CHECKFSGNJ-NEXT: fsh fa1, 12(sp) +; CHECKFSGNJ-NEXT: fsh fa0, 8(sp) +; CHECKFSGNJ-NEXT: lbu a0, 13(sp) +; CHECKFSGNJ-NEXT: lbu a1, 9(sp) +; CHECKFSGNJ-NEXT: andi a0, a0, 128 +; CHECKFSGNJ-NEXT: andi a1, a1, 127 +; CHECKFSGNJ-NEXT: or a0, a1, a0 +; CHECKFSGNJ-NEXT: sb a0, 9(sp) +; CHECKFSGNJ-NEXT: flh fa0, 8(sp) +; CHECKFSGNJ-NEXT: addi sp, sp, 16 +; CHECKFSGNJ-NEXT: ret +; CHECK64FSGNJ-LABEL: fsgnj_s: +; CHECK64FSGNJ: # %bb.0: +; CHECK64FSGNJ-NEXT: addi sp, sp, -16 +; CHECK64FSGNJ-NEXT: fsh fa1, 8(sp) +; CHECK64FSGNJ-NEXT: fsh fa0, 0(sp) +; CHECK64FSGNJ-NEXT: lbu a0, 9(sp) +; CHECK64FSGNJ-NEXT: lbu a1, 1(sp) +; CHECK64FSGNJ-NEXT: andi a0, a0, 128 +; CHECK64FSGNJ-NEXT: andi a1, a1, 127 +; CHECK64FSGNJ-NEXT: or a0, a1, a0 +; CHECK64FSGNJ-NEXT: sb a0, 1(sp) +; CHECK64FSGNJ-NEXT: flh fa0, 0(sp) +; CHECK64FSGNJ-NEXT: addi sp, sp, 16 +; CHECK64FSGNJ-NEXT: ret %1 = call half @llvm.copysign.f16(half %a, half %b) ret half %1 } @@ -394,6 +495,18 @@ ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fneg_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fadd half %a, %a %2 = fneg half %1 %3 = fcmp oeq half %1, %2 @@ -487,6 +600,92 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECK-RV32-FSGNJ-LABEL: fsgnjn_s: +; CHECK-RV32-FSGNJ: # %bb.0: +; CHECK-RV32-FSGNJ-NEXT: addi sp, sp, -16 +; CHECK-RV32-FSGNJ-NEXT: fcvt.s.h ft0, fa1 +; CHECK-RV32-FSGNJ-NEXT: fcvt.s.h ft1, fa0 +; CHECK-RV32-FSGNJ-NEXT: fadd.s ft0, ft1, ft0 +; CHECK-RV32-FSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECK-RV32-FSGNJ-NEXT: fcvt.s.h ft0, ft0 +; CHECK-RV32-FSGNJ-NEXT: fneg.s ft0, ft0 +; CHECK-RV32-FSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECK-RV32-FSGNJ-NEXT: fsh fa0, 8(sp) +; CHECK-RV32-FSGNJ-NEXT: fsh ft0, 12(sp) +; CHECK-RV32-FSGNJ-NEXT: lbu a0, 9(sp) +; CHECK-RV32-FSGNJ-NEXT: lbu a1, 13(sp) +; CHECK-RV32-FSGNJ-NEXT: andi a0, a0, 127 +; CHECK-RV32-FSGNJ-NEXT: andi a1, a1, 128 +; CHECK-RV32-FSGNJ-NEXT: or a0, a0, a1 +; CHECK-RV32-FSGNJ-NEXT: sb a0, 9(sp) +; CHECK-RV32-FSGNJ-NEXT: flh fa0, 8(sp) +; CHECK-RV32-FSGNJ-NEXT: addi sp, sp, 16 +; CHECK-RV32-FSGNJ-NEXT: ret +; +; CHECK-RV64-FSGNJ-LABEL: fsgnjn_s: +; CHECK-RV64-FSGNJ: # %bb.0: +; CHECK-RV64-FSGNJ-NEXT: addi sp, sp, -16 +; CHECK-RV64-FSGNJ-NEXT: fcvt.s.h ft0, fa1 +; CHECK-RV64-FSGNJ-NEXT: fcvt.s.h ft1, fa0 +; CHECK-RV64-FSGNJ-NEXT: fadd.s ft0, ft1, ft0 +; CHECK-RV64-FSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECK-RV64-FSGNJ-NEXT: fcvt.s.h ft0, ft0 +; CHECK-RV64-FSGNJ-NEXT: fneg.s ft0, ft0 +; CHECK-RV64-FSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECK-RV64-FSGNJ-NEXT: fsh fa0, 0(sp) +; CHECK-RV64-FSGNJ-NEXT: fsh ft0, 8(sp) +; CHECK-RV64-FSGNJ-NEXT: lbu a0, 1(sp) +; CHECK-RV64-FSGNJ-NEXT: lbu a1, 9(sp) +; CHECK-RV64-FSGNJ-NEXT: andi a0, a0, 127 +; CHECK-RV64-FSGNJ-NEXT: andi a1, a1, 128 +; CHECK-RV64-FSGNJ-NEXT: or a0, a0, a1 +; CHECK-RV64-FSGNJ-NEXT: sb a0, 1(sp) +; CHECK-RV64-FSGNJ-NEXT: flh fa0, 0(sp) +; CHECK-RV64-FSGNJ-NEXT: addi sp, sp, 16 +; CHECK-RV64-FSGNJ-NEXT: ret +; CHECKFSGNJ-LABEL: fsgnjn_s: +; CHECKFSGNJ: # %bb.0: +; CHECKFSGNJ-NEXT: addi sp, sp, -16 +; CHECKFSGNJ-NEXT: fcvt.s.h ft0, fa1 +; CHECKFSGNJ-NEXT: fcvt.s.h ft1, fa0 +; CHECKFSGNJ-NEXT: fadd.s ft0, ft1, ft0 +; CHECKFSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECKFSGNJ-NEXT: fcvt.s.h ft0, ft0 +; CHECKFSGNJ-NEXT: fneg.s ft0, ft0 +; CHECKFSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECKFSGNJ-NEXT: fsh fa0, 8(sp) +; CHECKFSGNJ-NEXT: fsh ft0, 12(sp) +; CHECKFSGNJ-NEXT: lbu a0, 9(sp) +; CHECKFSGNJ-NEXT: lbu a1, 13(sp) +; CHECKFSGNJ-NEXT: andi a0, a0, 127 +; CHECKFSGNJ-NEXT: andi a1, a1, 128 +; CHECKFSGNJ-NEXT: or a0, a0, a1 +; CHECKFSGNJ-NEXT: sb a0, 9(sp) +; CHECKFSGNJ-NEXT: flh fa0, 8(sp) +; CHECKFSGNJ-NEXT: addi sp, sp, 16 +; CHECKFSGNJ-NEXT: ret +; CHECK64FSGNJ-LABEL: fsgnjn_s: +; CHECK64FSGNJ: # %bb.0: +; CHECK64FSGNJ-NEXT: addi sp, sp, -16 +; CHECK64FSGNJ-NEXT: fcvt.s.h ft0, fa1 +; CHECK64FSGNJ-NEXT: fcvt.s.h ft1, fa0 +; CHECK64FSGNJ-NEXT: fadd.s ft0, ft1, ft0 +; CHECK64FSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECK64FSGNJ-NEXT: fcvt.s.h ft0, ft0 +; CHECK64FSGNJ-NEXT: fneg.s ft0, ft0 +; CHECK64FSGNJ-NEXT: fcvt.h.s ft0, ft0 +; CHECK64FSGNJ-NEXT: fsh fa0, 0(sp) +; CHECK64FSGNJ-NEXT: fsh ft0, 8(sp) +; CHECK64FSGNJ-NEXT: lbu a0, 1(sp) +; CHECK64FSGNJ-NEXT: lbu a1, 9(sp) +; CHECK64FSGNJ-NEXT: andi a0, a0, 127 +; CHECK64FSGNJ-NEXT: andi a1, a1, 128 +; CHECK64FSGNJ-NEXT: or a0, a0, a1 +; CHECK64FSGNJ-NEXT: sb a0, 1(sp) +; CHECK64FSGNJ-NEXT: flh fa0, 0(sp) +; CHECK64FSGNJ-NEXT: addi sp, sp, 16 +; CHECK64FSGNJ-NEXT: ret %1 = fadd half %a, %b %2 = fneg half %1 %3 = call half @llvm.copysign.f16(half %a, half %2) @@ -578,6 +777,20 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fabs_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fabs.s ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fadd half %a, %b %2 = call half @llvm.fabs.f16(half %1) %3 = fadd half %2, %1 @@ -643,6 +856,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmin_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.minnum.f16(half %a, half %b) ret half %1 } @@ -706,6 +927,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmax_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fmax.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.maxnum.f16(half %a, half %b) ret half %1 } @@ -783,6 +1012,15 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmadd_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.fma.f16(half %a, half %b, half %c) ret half %1 } @@ -882,6 +1120,22 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmsub_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %c_ = fadd half 0.0, %c ; avoid negation using xor %negc = fsub half -0.0, %c_ %1 = call half @llvm.fma.f16(half %a, half %b, half %negc) @@ -1012,6 +1266,28 @@ ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmadd_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa2 +; CHECKIZFHMIN-NEXT: fadd.s ft1, ft2, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fneg.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa1 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft0, ft2, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a_ = fadd half 0.0, %a %c_ = fadd half 0.0, %c %nega = fsub half -0.0, %a_ @@ -1144,6 +1420,28 @@ ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmadd_s_2: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa2 +; CHECKIZFHMIN-NEXT: fadd.s ft1, ft2, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fneg.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft2, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %b_ = fadd half 0.0, %b %c_ = fadd half 0.0, %c %negb = fsub half -0.0, %b_ @@ -1240,6 +1538,18 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmadd_s_3: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.fma.f16(half %a, half %b, half %c) %neg = fneg half %1 ret half %neg @@ -1331,6 +1641,18 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmadd_nsz: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call nsz half @llvm.fma.f16(half %a, half %b, half %c) %neg = fneg nsz half %1 ret half %neg @@ -1429,6 +1751,22 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmsub_s: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa1 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft0, ft2, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a_ = fadd half 0.0, %a %nega = fsub half -0.0, %a_ %1 = call half @llvm.fma.f16(half %nega, half %b, half %c) @@ -1530,6 +1868,22 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmsub_s_2: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft2, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %b_ = fadd half 0.0, %b %negb = fsub half -0.0, %b_ %1 = call half @llvm.fma.f16(half %a, half %negb, half %c) @@ -1617,6 +1971,18 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmadd_s_contract: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fmul.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa2 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fmul contract half %a, %b %2 = fadd contract half %1, %c ret half %2 @@ -1717,6 +2083,22 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmsub_s_contract: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmul.s ft1, ft2, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fsub.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %c_ = fadd half 0.0, %c ; avoid negation using xor %1 = fmul contract half %a, %b %2 = fsub contract half %1, %c_ @@ -1854,6 +2236,31 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmadd_s_contract: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa1 +; CHECKIZFHMIN-NEXT: fadd.s ft2, ft2, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft2, ft2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft3, fa2 +; CHECKIZFHMIN-NEXT: fadd.s ft1, ft3, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, ft2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fmul.s ft0, ft0, ft2 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fneg.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fsub.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a_ = fadd half 0.0, %a ; avoid negation using xor %b_ = fadd half 0.0, %b ; avoid negation using xor %c_ = fadd half 0.0, %c ; avoid negation using xor @@ -1973,6 +2380,25 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fnmsub_s_contract: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fmv.w.x ft1, zero +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa1 +; CHECKIZFHMIN-NEXT: fadd.s ft1, ft2, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fmul.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa2 +; CHECKIZFHMIN-NEXT: fsub.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a_ = fadd half 0.0, %a ; avoid negation using xor %b_ = fadd half 0.0, %b ; avoid negation using xor %1 = fmul contract half %a_, %b_ diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll --- a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll @@ -7,6 +7,12 @@ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV32IZFHMIN %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV64IZFHMIN %s ; This file tests cases where simple floating point operations can be ; profitably handled though bit manipulation if a soft-float ABI is being used @@ -38,6 +44,18 @@ ; RV64IZFH-NEXT: lui a1, 1048568 ; RV64IZFH-NEXT: xor a0, a0, a1 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: fneg: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a1, 1048568 +; RV32IZFHMIN-NEXT: xor a0, a0, a1 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: fneg: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a1, 1048568 +; RV64IZFHMIN-NEXT: xor a0, a0, a1 +; RV64IZFHMIN-NEXT: ret %1 = fneg half %a ret half %1 } @@ -68,6 +86,18 @@ ; RV64IZFH-NEXT: slli a0, a0, 49 ; RV64IZFH-NEXT: srli a0, a0, 49 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: fabs: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: slli a0, a0, 17 +; RV32IZFHMIN-NEXT: srli a0, a0, 17 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: fabs: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: slli a0, a0, 49 +; RV64IZFHMIN-NEXT: srli a0, a0, 49 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.fabs.f16(half %a) ret half %1 } @@ -114,6 +144,48 @@ ; RV64IZFH-NEXT: fsgnjn.h ft0, ft1, ft0 ; RV64IZFH-NEXT: fmv.x.h a0, ft0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: fcopysign_fneg: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: fmv.h.x ft0, a0 +; RV32IZFHMIN-NEXT: fmv.h.x ft1, a1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; RV32IZFHMIN-NEXT: fneg.s ft1, ft1 +; RV32IZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; RV32IZFHMIN-NEXT: fsh ft0, 8(sp) +; RV32IZFHMIN-NEXT: fsh ft1, 12(sp) +; RV32IZFHMIN-NEXT: lbu a0, 9(sp) +; RV32IZFHMIN-NEXT: lbu a1, 13(sp) +; RV32IZFHMIN-NEXT: andi a0, a0, 127 +; RV32IZFHMIN-NEXT: andi a1, a1, 128 +; RV32IZFHMIN-NEXT: or a0, a0, a1 +; RV32IZFHMIN-NEXT: sb a0, 9(sp) +; RV32IZFHMIN-NEXT: flh ft0, 8(sp) +; RV32IZFHMIN-NEXT: fmv.x.h a0, ft0 +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: fcopysign_fneg: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: fmv.h.x ft0, a0 +; RV64IZFHMIN-NEXT: fmv.h.x ft1, a1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; RV64IZFHMIN-NEXT: fneg.s ft1, ft1 +; RV64IZFHMIN-NEXT: fcvt.h.s ft1, ft1 +; RV64IZFHMIN-NEXT: fsh ft0, 0(sp) +; RV64IZFHMIN-NEXT: fsh ft1, 8(sp) +; RV64IZFHMIN-NEXT: lbu a0, 1(sp) +; RV64IZFHMIN-NEXT: lbu a1, 9(sp) +; RV64IZFHMIN-NEXT: andi a0, a0, 127 +; RV64IZFHMIN-NEXT: andi a1, a1, 128 +; RV64IZFHMIN-NEXT: or a0, a0, a1 +; RV64IZFHMIN-NEXT: sb a0, 1(sp) +; RV64IZFHMIN-NEXT: flh ft0, 0(sp) +; RV64IZFHMIN-NEXT: fmv.x.h a0, ft0 +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = fneg half %b %2 = call half @llvm.copysign.f16(half %a, half %1) ret half %2 diff --git a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll --- a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll @@ -3,6 +3,10 @@ ; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFHMIN %s declare void @abort() declare void @exit(i32) @@ -30,6 +34,28 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_false: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: li a0, 1 +; RV32IZFHMIN-NEXT: bnez a0, .LBB0_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.then +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB0_2: # %if.else +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_false: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: li a0, 1 +; RV64IZFHMIN-NEXT: bnez a0, .LBB0_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.then +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB0_2: # %if.else +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp false half %a, %b br i1 %1, label %if.then, label %if.else if.then: @@ -61,6 +87,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_oeq: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: feq.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB1_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB1_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_oeq: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: feq.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB1_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB1_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp oeq half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -95,6 +147,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_oeq_alt: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: feq.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB2_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB2_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_oeq_alt: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: feq.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB2_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB2_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp oeq half %a, %b br i1 %1, label %if.then, label %if.else if.then: @@ -126,6 +204,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_ogt: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV32IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB3_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB3_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_ogt: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV64IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB3_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB3_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp ogt half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -157,6 +261,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_oge: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV32IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB4_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB4_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_oge: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV64IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB4_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB4_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp oge half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -188,6 +318,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_olt: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB5_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB5_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_olt: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB5_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB5_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp olt half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -219,6 +375,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_ole: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB6_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB6_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_ole: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB6_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB6_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp ole half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -254,6 +436,36 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_one: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: flt.s a1, ft0, ft1 +; RV32IZFHMIN-NEXT: or a0, a1, a0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB7_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB7_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_one: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: flt.s a1, ft0, ft1 +; RV64IZFHMIN-NEXT: or a0, a1, a0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB7_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB7_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp one half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -289,6 +501,36 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_ord: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: feq.s a0, ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: bnez a0, .LBB8_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB8_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_ord: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: feq.s a0, ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: bnez a0, .LBB8_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB8_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp ord half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -324,6 +566,36 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_ueq: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: flt.s a1, ft0, ft1 +; RV32IZFHMIN-NEXT: or a0, a1, a0 +; RV32IZFHMIN-NEXT: beqz a0, .LBB9_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB9_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_ueq: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: flt.s a1, ft0, ft1 +; RV64IZFHMIN-NEXT: or a0, a1, a0 +; RV64IZFHMIN-NEXT: beqz a0, .LBB9_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB9_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp ueq half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -355,6 +627,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_ugt: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: beqz a0, .LBB10_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB10_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_ugt: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: beqz a0, .LBB10_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB10_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp ugt half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -386,6 +684,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_uge: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: beqz a0, .LBB11_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB11_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_uge: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: beqz a0, .LBB11_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB11_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp uge half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -417,6 +741,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_ult: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV32IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: beqz a0, .LBB12_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB12_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_ult: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV64IZFHMIN-NEXT: fle.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: beqz a0, .LBB12_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB12_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp ult half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -448,6 +798,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_ule: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV32IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: beqz a0, .LBB13_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB13_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_ule: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; RV64IZFHMIN-NEXT: flt.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: beqz a0, .LBB13_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB13_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp ule half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -479,6 +855,32 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_une: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: feq.s a0, ft1, ft0 +; RV32IZFHMIN-NEXT: beqz a0, .LBB14_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB14_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_une: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: feq.s a0, ft1, ft0 +; RV64IZFHMIN-NEXT: beqz a0, .LBB14_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB14_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp une half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -514,6 +916,36 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_uno: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: feq.s a0, ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: beqz a0, .LBB15_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB15_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_uno: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: feq.s a0, ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: beqz a0, .LBB15_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB15_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp uno half %a, %b br i1 %1, label %if.then, label %if.else if.else: @@ -545,6 +977,28 @@ ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: call abort@plt +; +; RV32IZFHMIN-LABEL: br_fcmp_true: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: li a0, 1 +; RV32IZFHMIN-NEXT: bnez a0, .LBB16_2 +; RV32IZFHMIN-NEXT: # %bb.1: # %if.else +; RV32IZFHMIN-NEXT: ret +; RV32IZFHMIN-NEXT: .LBB16_2: # %if.then +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: call abort@plt +; +; RV64IZFHMIN-LABEL: br_fcmp_true: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: li a0, 1 +; RV64IZFHMIN-NEXT: bnez a0, .LBB16_2 +; RV64IZFHMIN-NEXT: # %bb.1: # %if.else +; RV64IZFHMIN-NEXT: ret +; RV64IZFHMIN-NEXT: .LBB16_2: # %if.then +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: call abort@plt %1 = fcmp true half %a, %b br i1 %1, label %if.then, label %if.else if.else: diff --git a/llvm/test/CodeGen/RISCV/half-convert-strict.ll b/llvm/test/CodeGen/RISCV/half-convert-strict.ll --- a/llvm/test/CodeGen/RISCV/half-convert-strict.ll +++ b/llvm/test/CodeGen/RISCV/half-convert-strict.ll @@ -11,6 +11,18 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64d -disable-strictnode-mutation < %s \ ; RUN: | FileCheck -check-prefix=RV64IDZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefixes=CHECK32-IZFHMIN,RV32IFZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefixes=CHECK64-IZFHMIN,RV64IFZFHMIN %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefixes=CHECK32-IZFHMIN,RV32IDZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefixes=CHECK64-IZFHMIN,RV64IDZFHMIN %s ; NOTE: The rounding mode metadata does not effect which instruction is ; selected. Dynamic rounding mode is always used for operations that @@ -36,6 +48,18 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_si_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_si_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %a, metadata !"fpexcept.strict") strictfp ret i16 %1 } @@ -61,6 +85,18 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_ui_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_ui_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %a, metadata !"fpexcept.strict") strictfp ret i16 %1 } @@ -81,6 +117,18 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp ret i32 %1 } @@ -101,6 +149,18 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp ret i32 %1 } @@ -136,6 +196,26 @@ ; RV64IDZFH-NEXT: li a0, 1 ; RV64IDZFH-NEXT: .LBB4_2: ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_h_multiple_use: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: bnez a0, .LBB4_2 +; CHECK32-IZFHMIN-NEXT: # %bb.1: +; CHECK32-IZFHMIN-NEXT: li a0, 1 +; CHECK32-IZFHMIN-NEXT: .LBB4_2: +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_h_multiple_use: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: bnez a0, .LBB4_2 +; CHECK64-IZFHMIN-NEXT: # %bb.1: +; CHECK64-IZFHMIN-NEXT: li a0, 1 +; CHECK64-IZFHMIN-NEXT: .LBB4_2: +; CHECK64-IZFHMIN-NEXT: ret %a = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") strictfp %b = icmp eq i32 %a, 0 %c = select i1 %b, i32 1, i32 %a @@ -170,6 +250,21 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_l_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __fixhfdi@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_l_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict") strictfp ret i64 %1 } @@ -203,6 +298,21 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_lu_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __fixunshfdi@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_lu_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict") strictfp ret i64 %1 } @@ -236,6 +346,22 @@ ; RV64IDZFH-NEXT: srai a0, a0, 48 ; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_si: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: slli a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: srai a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_si: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: srai a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -256,6 +382,18 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_si_signext: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_si_signext: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -288,6 +426,22 @@ ; RV64IDZFH-NEXT: srli a0, a0, 48 ; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_ui: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: slli a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: srli a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_ui: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: srli a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -308,6 +462,18 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_ui_zeroext: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_ui_zeroext: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -327,6 +493,19 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_w: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_w: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: sext.w a0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -350,6 +529,20 @@ ; RV64IDZFH-NEXT: lw a0, 0(a0) ; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_w_load: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: lw a0, 0(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_w_load: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: lw a0, 0(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %a = load i32, i32* %p %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 @@ -370,6 +563,20 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_wu: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_wu: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -399,6 +606,20 @@ ; RV64IDZFH-NEXT: lwu a0, 0(a0) ; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_wu_load: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: lw a0, 0(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_wu_load: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: lwu a0, 0(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %a = load i32, i32* %p %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 @@ -432,6 +653,21 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.l fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_l: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __floatdihf@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_l: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -465,6 +701,21 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.lu fa0, a0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_lu: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __floatundihf@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_lu: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret half %1 } @@ -485,6 +736,16 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_s: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_s: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") ret half %1 } @@ -505,6 +766,16 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_s_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_s_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; CHECK64-IZFHMIN-NEXT: ret %1 = call float @llvm.experimental.constrained.fpext.f32.f16(half %a, metadata !"fpexcept.strict") ret float %1 } @@ -538,6 +809,34 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.h.d fa0, fa0 ; RV64IDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fcvt_h_d: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: call __truncdfhf2@plt +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; RV64IFZFHMIN-LABEL: fcvt_h_d: +; RV64IFZFHMIN: # %bb.0: +; RV64IFZFHMIN-NEXT: addi sp, sp, -16 +; RV64IFZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFZFHMIN-NEXT: call __truncdfhf2@plt +; RV64IFZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFZFHMIN-NEXT: addi sp, sp, 16 +; RV64IFZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: fcvt_h_d: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fcvt.h.d fa0, fa0 +; RV32IDZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: fcvt_h_d: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fcvt.h.d fa0, fa0 +; RV64IDZFHMIN-NEXT: ret %1 = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") ret half %1 } @@ -573,6 +872,36 @@ ; RV64IDZFH: # %bb.0: ; RV64IDZFH-NEXT: fcvt.d.h fa0, fa0 ; RV64IDZFH-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fcvt_d_h: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IFZFHMIN-NEXT: call __extendsfdf2@plt +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; RV64IFZFHMIN-LABEL: fcvt_d_h: +; RV64IFZFHMIN: # %bb.0: +; RV64IFZFHMIN-NEXT: addi sp, sp, -16 +; RV64IFZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IFZFHMIN-NEXT: call __extendsfdf2@plt +; RV64IFZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFZFHMIN-NEXT: addi sp, sp, 16 +; RV64IFZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: fcvt_d_h: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fcvt.d.h fa0, fa0 +; RV32IDZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: fcvt_d_h: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fcvt.d.h fa0, fa0 +; RV64IDZFHMIN-NEXT: ret %1 = call double @llvm.experimental.constrained.fpext.f64.f16(half %a, metadata !"fpexcept.strict") ret double %1 } @@ -607,6 +936,22 @@ ; RV64IDZFH-NEXT: fcvt.h.w ft0, a0 ; RV64IDZFH-NEXT: fsh ft0, 0(a1) ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_w_demanded_bits: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi a0, a0, 1 +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK32-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_w_demanded_bits: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: addiw a0, a0, 1 +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK64-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK64-IZFHMIN-NEXT: ret %3 = add i32 %0, 1 %4 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp store half %4, half* %1, align 2 @@ -642,6 +987,24 @@ ; RV64IDZFH-NEXT: fcvt.h.wu ft0, a0 ; RV64IDZFH-NEXT: fsh ft0, 0(a1) ; RV64IDZFH-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_wu_demanded_bits: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi a0, a0, 1 +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK32-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_wu_demanded_bits: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: addiw a0, a0, 1 +; CHECK64-IZFHMIN-NEXT: slli a2, a0, 32 +; CHECK64-IZFHMIN-NEXT: srli a2, a2, 32 +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a2 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK64-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK64-IZFHMIN-NEXT: ret %3 = add i32 %0, 1 %4 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp store half %4, half* %1, align 2 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -11,6 +11,14 @@ ; RUN: < %s | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs \ ; RUN: < %s | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK32-IZFHMIN,RV32IFZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK64-IZFHMIN,RV64IFZFHMIN %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32d < %s | FileCheck -check-prefixes=CHECK32-IZFHMIN,RV32IDZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64d < %s | FileCheck -check-prefixes=CHECK64-IZFHMIN,RV64IDZFHMIN %s define i16 @fcvt_si_h(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_si_h: @@ -56,6 +64,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_si_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_si_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptosi half %a to i16 ret i16 %1 } @@ -210,6 +230,38 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_si_h_sat: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK32-IZFHMIN-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK32-IZFHMIN-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK32-IZFHMIN-NEXT: fmax.s ft0, ft2, ft0 +; CHECK32-IZFHMIN-NEXT: fmin.s ft0, ft0, ft1 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: feq.s a1, ft2, ft2 +; CHECK32-IZFHMIN-NEXT: seqz a1, a1 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK32-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_si_h_sat: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK64-IZFHMIN-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK64-IZFHMIN-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK64-IZFHMIN-NEXT: fmax.s ft0, ft2, ft0 +; CHECK64-IZFHMIN-NEXT: fmin.s ft0, ft0, ft1 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft2, ft2 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a) ret i16 %0 @@ -260,6 +312,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_ui_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_ui_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptoui half %a to i16 ret i16 %1 } @@ -384,6 +448,28 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_ui_h_sat: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK32-IZFHMIN-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x ft2, zero +; CHECK32-IZFHMIN-NEXT: fmax.s ft1, ft1, ft2 +; CHECK32-IZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_ui_h_sat: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK64-IZFHMIN-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK64-IZFHMIN-NEXT: fmv.w.x ft2, zero +; CHECK64-IZFHMIN-NEXT: fmax.s ft1, ft1, ft2 +; CHECK64-IZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i16 @llvm.fptoui.sat.i16.f16(half %a) ret i16 %0 @@ -429,6 +515,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptosi half %a to i32 ret i32 %1 } @@ -550,6 +648,26 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_h_sat: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK32-IZFHMIN-NEXT: seqz a1, a1 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK32-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_h_sat: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %0 @@ -595,6 +713,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptoui half %a to i32 ret i32 %1 } @@ -660,6 +790,26 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_h_multiple_use: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: bnez a0, .LBB7_2 +; CHECK32-IZFHMIN-NEXT: # %bb.1: +; CHECK32-IZFHMIN-NEXT: li a0, 1 +; CHECK32-IZFHMIN-NEXT: .LBB7_2: +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_h_multiple_use: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: bnez a0, .LBB7_2 +; CHECK64-IZFHMIN-NEXT: # %bb.1: +; CHECK64-IZFHMIN-NEXT: li a0, 1 +; CHECK64-IZFHMIN-NEXT: .LBB7_2: +; CHECK64-IZFHMIN-NEXT: ret %a = fptoui half %x to i32 %b = icmp eq i32 %a, 0 %c = select i1 %b, i32 1, i32 %a @@ -776,6 +926,28 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_h_sat: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK32-IZFHMIN-NEXT: seqz a1, a1 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK32-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_h_sat: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i32 @llvm.fptoui.sat.i32.f16(half %a) ret i32 %0 @@ -834,6 +1006,21 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_l_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __fixhfdi@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_l_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptosi half %a to i64 ret i64 %1 } @@ -1047,6 +1234,94 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fcvt_l_h_sat: +; RV32IFZFHMIN: # %bb.0: # %start +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IFZFHMIN-NEXT: flw ft0, %lo(.LCPI10_0)(a0) +; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IFZFHMIN-NEXT: fle.s s0, ft0, fs0 +; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IFZFHMIN-NEXT: call __fixsfdi@plt +; RV32IFZFHMIN-NEXT: lui a3, 524288 +; RV32IFZFHMIN-NEXT: bnez s0, .LBB10_2 +; RV32IFZFHMIN-NEXT: # %bb.1: # %start +; RV32IFZFHMIN-NEXT: lui a1, 524288 +; RV32IFZFHMIN-NEXT: .LBB10_2: # %start +; RV32IFZFHMIN-NEXT: lui a2, %hi(.LCPI10_1) +; RV32IFZFHMIN-NEXT: flw ft0, %lo(.LCPI10_1)(a2) +; RV32IFZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IFZFHMIN-NEXT: beqz a2, .LBB10_4 +; RV32IFZFHMIN-NEXT: # %bb.3: +; RV32IFZFHMIN-NEXT: addi a1, a3, -1 +; RV32IFZFHMIN-NEXT: .LBB10_4: # %start +; RV32IFZFHMIN-NEXT: feq.s a3, fs0, fs0 +; RV32IFZFHMIN-NEXT: seqz a3, a3 +; RV32IFZFHMIN-NEXT: addi a3, a3, -1 +; RV32IFZFHMIN-NEXT: and a1, a3, a1 +; RV32IFZFHMIN-NEXT: neg a2, a2 +; RV32IFZFHMIN-NEXT: neg a4, s0 +; RV32IFZFHMIN-NEXT: and a0, a4, a0 +; RV32IFZFHMIN-NEXT: or a0, a2, a0 +; RV32IFZFHMIN-NEXT: and a0, a3, a0 +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_l_h_sat: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK64-IZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: fcvt_l_h_sat: +; RV32IDZFHMIN: # %bb.0: # %start +; RV32IDZFHMIN-NEXT: addi sp, sp, -16 +; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IDZFHMIN-NEXT: flw ft0, %lo(.LCPI10_0)(a0) +; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFHMIN-NEXT: fle.s s0, ft0, fs0 +; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IDZFHMIN-NEXT: call __fixsfdi@plt +; RV32IDZFHMIN-NEXT: lui a3, 524288 +; RV32IDZFHMIN-NEXT: bnez s0, .LBB10_2 +; RV32IDZFHMIN-NEXT: # %bb.1: # %start +; RV32IDZFHMIN-NEXT: lui a1, 524288 +; RV32IDZFHMIN-NEXT: .LBB10_2: # %start +; RV32IDZFHMIN-NEXT: lui a2, %hi(.LCPI10_1) +; RV32IDZFHMIN-NEXT: flw ft0, %lo(.LCPI10_1)(a2) +; RV32IDZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IDZFHMIN-NEXT: beqz a2, .LBB10_4 +; RV32IDZFHMIN-NEXT: # %bb.3: +; RV32IDZFHMIN-NEXT: addi a1, a3, -1 +; RV32IDZFHMIN-NEXT: .LBB10_4: # %start +; RV32IDZFHMIN-NEXT: feq.s a3, fs0, fs0 +; RV32IDZFHMIN-NEXT: seqz a3, a3 +; RV32IDZFHMIN-NEXT: addi a3, a3, -1 +; RV32IDZFHMIN-NEXT: and a1, a3, a1 +; RV32IDZFHMIN-NEXT: neg a2, a2 +; RV32IDZFHMIN-NEXT: neg a4, s0 +; RV32IDZFHMIN-NEXT: and a0, a4, a0 +; RV32IDZFHMIN-NEXT: or a0, a2, a0 +; RV32IDZFHMIN-NEXT: and a0, a3, a0 +; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IDZFHMIN-NEXT: addi sp, sp, 16 +; RV32IDZFHMIN-NEXT: ret start: %0 = tail call i64 @llvm.fptosi.sat.i64.f16(half %a) ret i64 %0 @@ -1105,6 +1380,21 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_lu_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __fixunshfdi@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_lu_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptoui half %a to i64 ret i64 %1 } @@ -1260,6 +1550,41 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_lu_h_sat: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK32-IZFHMIN-NEXT: flw ft0, %lo(.LCPI12_0)(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; CHECK32-IZFHMIN-NEXT: flt.s a0, ft0, fa0 +; CHECK32-IZFHMIN-NEXT: neg s0, a0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x ft0, zero +; CHECK32-IZFHMIN-NEXT: fle.s a0, ft0, fa0 +; CHECK32-IZFHMIN-NEXT: neg s1, a0 +; CHECK32-IZFHMIN-NEXT: call __fixunssfdi@plt +; CHECK32-IZFHMIN-NEXT: and a0, s1, a0 +; CHECK32-IZFHMIN-NEXT: or a0, s0, a0 +; CHECK32-IZFHMIN-NEXT: and a1, s1, a1 +; CHECK32-IZFHMIN-NEXT: or a1, s0, a1 +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_lu_h_sat: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i64 @llvm.fptoui.sat.i64.f16(half %a) ret i64 %0 @@ -1318,6 +1643,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_si: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: slli a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: srai a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_si: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: srai a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = sitofp i16 %a to half ret half %1 } @@ -1357,6 +1698,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_si_signext: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_si_signext: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = sitofp i16 %a to half ret half %1 } @@ -1413,6 +1766,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_ui: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: slli a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: srli a0, a0, 16 +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_ui: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: srli a0, a0, 48 +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = uitofp i16 %a to half ret half %1 } @@ -1452,6 +1821,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_ui_zeroext: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_ui_zeroext: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = uitofp i16 %a to half ret half %1 } @@ -1492,6 +1873,19 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_w: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_w: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: sext.w a0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = sitofp i32 %a to half ret half %1 } @@ -1536,6 +1930,20 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_w_load: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: lw a0, 0(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_w_load: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: lw a0, 0(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %a = load i32, i32* %p %1 = sitofp i32 %a to half ret half %1 @@ -1577,6 +1985,20 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_wu: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_wu: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = uitofp i32 %a to half ret half %1 } @@ -1627,6 +2049,20 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_wu_load: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: lw a0, 0(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_wu_load: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: lwu a0, 0(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %a = load i32, i32* %p %1 = uitofp i32 %a to half ret half %1 @@ -1680,6 +2116,21 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_l: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __floatdihf@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_l: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = sitofp i64 %a to half ret half %1 } @@ -1732,6 +2183,21 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_lu: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi sp, sp, -16 +; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: call __floatundihf@plt +; CHECK32-IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32-IZFHMIN-NEXT: addi sp, sp, 16 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_lu: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECK64-IZFHMIN-NEXT: ret %1 = uitofp i64 %a to half ret half %1 } @@ -1769,6 +2235,16 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_s: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_s: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; CHECK64-IZFHMIN-NEXT: ret %1 = fptrunc float %a to half ret half %1 } @@ -1810,6 +2286,16 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_s_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_s_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; CHECK64-IZFHMIN-NEXT: ret %1 = fpext half %a to float ret float %1 } @@ -1860,6 +2346,34 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fcvt_h_d: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: call __truncdfhf2@plt +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; RV64IFZFHMIN-LABEL: fcvt_h_d: +; RV64IFZFHMIN: # %bb.0: +; RV64IFZFHMIN-NEXT: addi sp, sp, -16 +; RV64IFZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFZFHMIN-NEXT: call __truncdfhf2@plt +; RV64IFZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFZFHMIN-NEXT: addi sp, sp, 16 +; RV64IFZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: fcvt_h_d: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fcvt.h.d fa0, fa0 +; RV32IDZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: fcvt_h_d: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fcvt.h.d fa0, fa0 +; RV64IDZFHMIN-NEXT: ret %1 = fptrunc double %a to half ret half %1 } @@ -1920,6 +2434,36 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IFZFHMIN-LABEL: fcvt_d_h: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IFZFHMIN-NEXT: call __extendsfdf2@plt +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; RV64IFZFHMIN-LABEL: fcvt_d_h: +; RV64IFZFHMIN: # %bb.0: +; RV64IFZFHMIN-NEXT: addi sp, sp, -16 +; RV64IFZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IFZFHMIN-NEXT: call __extendsfdf2@plt +; RV64IFZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFZFHMIN-NEXT: addi sp, sp, 16 +; RV64IFZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: fcvt_d_h: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fcvt.d.h fa0, fa0 +; RV32IDZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: fcvt_d_h: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fcvt.d.h fa0, fa0 +; RV64IDZFHMIN-NEXT: ret %1 = fpext half %a to double ret double %1 } @@ -1947,6 +2491,16 @@ ; RV64I-LABEL: bitcast_h_i16: ; RV64I: # %bb.0: ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: bitcast_h_i16: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fmv.h.x fa0, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: bitcast_h_i16: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fmv.h.x fa0, a0 +; CHECK64-IZFHMIN-NEXT: ret %1 = bitcast i16 %a to half ret half %1 } @@ -1974,6 +2528,16 @@ ; RV64I-LABEL: bitcast_i16_h: ; RV64I: # %bb.0: ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: bitcast_i16_h: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fmv.x.h a0, fa0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: bitcast_i16_h: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fmv.x.h a0, fa0 +; CHECK64-IZFHMIN-NEXT: ret %1 = bitcast half %a to i16 ret i16 %1 } @@ -2045,6 +2609,22 @@ ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_w_demanded_bits: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi a0, a0, 1 +; CHECK32-IZFHMIN-NEXT: fcvt.s.w ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK32-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_w_demanded_bits: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: addiw a0, a0, 1 +; CHECK64-IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK64-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK64-IZFHMIN-NEXT: ret %3 = add i32 %0, 1 %4 = sitofp i32 %3 to half store half %4, half* %1, align 2 @@ -2118,6 +2698,24 @@ ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_h_wu_demanded_bits: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: addi a0, a0, 1 +; CHECK32-IZFHMIN-NEXT: fcvt.s.wu ft0, a0 +; CHECK32-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK32-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_h_wu_demanded_bits: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: addiw a0, a0, 1 +; CHECK64-IZFHMIN-NEXT: slli a2, a0, 32 +; CHECK64-IZFHMIN-NEXT: srli a2, a2, 32 +; CHECK64-IZFHMIN-NEXT: fcvt.s.lu ft0, a2 +; CHECK64-IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECK64-IZFHMIN-NEXT: fsh ft0, 0(a1) +; CHECK64-IZFHMIN-NEXT: ret %3 = add i32 %0, 1 %4 = uitofp i32 %3 to half store half %4, half* %1, align 2 @@ -2168,6 +2766,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_s_i16: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_s_i16: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptosi half %a to i16 ret i16 %1 } @@ -2326,6 +2936,38 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_s_sat_i16: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) +; CHECK32-IZFHMIN-NEXT: flw ft0, %lo(.LCPI32_0)(a0) +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_1) +; CHECK32-IZFHMIN-NEXT: flw ft1, %lo(.LCPI32_1)(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK32-IZFHMIN-NEXT: fmax.s ft0, ft2, ft0 +; CHECK32-IZFHMIN-NEXT: fmin.s ft0, ft0, ft1 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: feq.s a1, ft2, ft2 +; CHECK32-IZFHMIN-NEXT: seqz a1, a1 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK32-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_s_sat_i16: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) +; CHECK64-IZFHMIN-NEXT: flw ft0, %lo(.LCPI32_0)(a0) +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_1) +; CHECK64-IZFHMIN-NEXT: flw ft1, %lo(.LCPI32_1)(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK64-IZFHMIN-NEXT: fmax.s ft0, ft2, ft0 +; CHECK64-IZFHMIN-NEXT: fmin.s ft0, ft0, ft1 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft2, ft2 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a) ret i16 %0 @@ -2375,6 +3017,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_s_i16: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_s_i16: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptoui half %a to i16 ret i16 %1 } @@ -2505,6 +3159,28 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_s_sat_i16: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK32-IZFHMIN-NEXT: flw ft0, %lo(.LCPI34_0)(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x ft2, zero +; CHECK32-IZFHMIN-NEXT: fmax.s ft1, ft1, ft2 +; CHECK32-IZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_s_sat_i16: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK64-IZFHMIN-NEXT: flw ft0, %lo(.LCPI34_0)(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK64-IZFHMIN-NEXT: fmv.w.x ft2, zero +; CHECK64-IZFHMIN-NEXT: fmax.s ft1, ft1, ft2 +; CHECK64-IZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i16 @llvm.fptoui.sat.i16.f16(half %a) ret i16 %0 @@ -2554,6 +3230,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_s_i8: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_s_i8: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptosi half %a to i8 ret i8 %1 } @@ -2708,6 +3396,38 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_s_sat_i8: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; CHECK32-IZFHMIN-NEXT: flw ft0, %lo(.LCPI36_0)(a0) +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI36_1) +; CHECK32-IZFHMIN-NEXT: flw ft1, %lo(.LCPI36_1)(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK32-IZFHMIN-NEXT: fmax.s ft0, ft2, ft0 +; CHECK32-IZFHMIN-NEXT: fmin.s ft0, ft0, ft1 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: feq.s a1, ft2, ft2 +; CHECK32-IZFHMIN-NEXT: seqz a1, a1 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK32-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_s_sat_i8: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; CHECK64-IZFHMIN-NEXT: flw ft0, %lo(.LCPI36_0)(a0) +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI36_1) +; CHECK64-IZFHMIN-NEXT: flw ft1, %lo(.LCPI36_1)(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECK64-IZFHMIN-NEXT: fmax.s ft0, ft2, ft0 +; CHECK64-IZFHMIN-NEXT: fmin.s ft0, ft0, ft1 +; CHECK64-IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft2, ft2 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i8 @llvm.fptosi.sat.i8.f16(half %a) ret i8 %0 @@ -2758,6 +3478,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_s_i8: +; CHECK32-IZFHMIN: # %bb.0: +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_s_i8: +; CHECK64-IZFHMIN: # %bb.0: +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret %1 = fptoui half %a to i8 ret i8 %1 } @@ -2880,6 +3612,28 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_s_sat_i8: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; CHECK32-IZFHMIN-NEXT: flw ft0, %lo(.LCPI38_0)(a0) +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x ft2, zero +; CHECK32-IZFHMIN-NEXT: fmax.s ft1, ft1, ft2 +; CHECK32-IZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_s_sat_i8: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; CHECK64-IZFHMIN-NEXT: flw ft0, %lo(.LCPI38_0)(a0) +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECK64-IZFHMIN-NEXT: fmv.w.x ft2, zero +; CHECK64-IZFHMIN-NEXT: fmax.s ft1, ft1, ft2 +; CHECK64-IZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i8 @llvm.fptoui.sat.i8.f16(half %a) ret i8 %0 @@ -2998,6 +3752,28 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_wu_h_sat_zext: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK32-IZFHMIN-NEXT: seqz a1, a1 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK32-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_wu_h_sat_zext: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 +; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i32 @llvm.fptoui.sat.i32.f16(half %a) ret i32 %0 @@ -3121,6 +3897,26 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECK32-IZFHMIN-LABEL: fcvt_w_h_sat_sext: +; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK32-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK32-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK32-IZFHMIN-NEXT: seqz a1, a1 +; CHECK32-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK32-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK32-IZFHMIN-NEXT: ret +; +; CHECK64-IZFHMIN-LABEL: fcvt_w_h_sat_sext: +; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECK64-IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECK64-IZFHMIN-NEXT: seqz a1, a1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: and a0, a1, a0 +; CHECK64-IZFHMIN-NEXT: ret start: %0 = tail call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %0 diff --git a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll --- a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll @@ -3,12 +3,23 @@ ; RUN: -target-abi ilp32f -disable-strictnode-mutation < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f -disable-strictnode-mutation < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=CHECKIZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=CHECKIZFHMIN %s define i32 @fcmp_oeq(half %a, half %b) nounwind strictfp { ; CHECK-LABEL: fcmp_oeq: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.h a0, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_oeq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: feq.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -23,6 +34,14 @@ ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ogt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a1 +; CHECKIZFHMIN-NEXT: flt.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: fsflags a1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -36,6 +55,14 @@ ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_oge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a1 +; CHECKIZFHMIN-NEXT: fle.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: fsflags a1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -49,6 +76,14 @@ ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: feq.h zero, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_olt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a1 +; CHECKIZFHMIN-NEXT: flt.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: fsflags a1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -62,6 +97,14 @@ ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: feq.h zero, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ole: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a1 +; CHECKIZFHMIN-NEXT: fle.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: fsflags a1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -82,6 +125,19 @@ ; CHECK-NEXT: or a0, a2, a1 ; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_one: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: flt.h a1, fa0, fa1 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: flt.h a2, fa1, fa0 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: or a0, a2, a1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -94,6 +150,13 @@ ; CHECK-NEXT: feq.h a1, fa0, fa0 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ord: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: feq.h a0, fa1, fa1 +; CHECKIZFHMIN-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ord", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -115,6 +178,20 @@ ; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ueq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: flt.h a1, fa0, fa1 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: flt.h a2, fa1, fa0 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: or a1, a2, a1 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -129,6 +206,15 @@ ; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ugt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: fle.h a1, fa0, fa1 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -143,6 +229,15 @@ ; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_uge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: flt.h a1, fa0, fa1 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -157,6 +252,15 @@ ; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ult: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: fle.h a1, fa1, fa0 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -171,6 +275,15 @@ ; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ule: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: frflags a0 +; CHECKIZFHMIN-NEXT: flt.h a1, fa1, fa0 +; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 +; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -182,6 +295,12 @@ ; CHECK-NEXT: feq.h a0, fa0, fa1 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_une: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: feq.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -195,6 +314,14 @@ ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_uno: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: feq.h a0, fa1, fa1 +; CHECKIZFHMIN-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uno", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -207,6 +334,13 @@ ; CHECK-NEXT: fle.h a1, fa0, fa1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_oeq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: fle.h a1, fa0, fa1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -218,6 +352,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: flt.h a0, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_ogt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: flt.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -228,6 +367,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fle.h a0, fa1, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_oge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -238,6 +382,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: flt.h a0, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_olt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: flt.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -248,6 +397,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fle.h a0, fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_ole: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -260,6 +414,13 @@ ; CHECK-NEXT: flt.h a1, fa1, fa0 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_one: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: flt.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: flt.h a1, fa1, fa0 +; CHECKIZFHMIN-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -272,6 +433,13 @@ ; CHECK-NEXT: fle.h a1, fa0, fa0 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_ord: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa1, fa1 +; CHECKIZFHMIN-NEXT: fle.h a1, fa0, fa0 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ord", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -285,6 +453,14 @@ ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_ueq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: flt.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: flt.h a1, fa1, fa0 +; CHECKIZFHMIN-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -296,6 +472,12 @@ ; CHECK-NEXT: fle.h a0, fa0, fa1 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_ugt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -307,6 +489,12 @@ ; CHECK-NEXT: flt.h a0, fa0, fa1 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_uge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: flt.h a0, fa0, fa1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -318,6 +506,12 @@ ; CHECK-NEXT: fle.h a0, fa1, fa0 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_ult: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -329,6 +523,12 @@ ; CHECK-NEXT: flt.h a0, fa1, fa0 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_ule: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: flt.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -342,6 +542,14 @@ ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_une: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa1, fa0 +; CHECKIZFHMIN-NEXT: fle.h a1, fa0, fa1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 @@ -355,6 +563,14 @@ ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmps_uno: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fle.h a0, fa1, fa1 +; CHECKIZFHMIN-NEXT: fle.h a1, fa0, fa0 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uno", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 ret i32 %2 diff --git a/llvm/test/CodeGen/RISCV/half-fcmp.ll b/llvm/test/CodeGen/RISCV/half-fcmp.ll --- a/llvm/test/CodeGen/RISCV/half-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-fcmp.ll @@ -7,6 +7,14 @@ ; RUN: < %s | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=CHECKIZFHMIN-ILP32F-LP64F %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=CHECKIZFHMIN-ILP32F-LP64F %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=CHECKIZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=CHECKIZFHMIN %s define i32 @fcmp_false(half %a, half %b) nounwind { ; CHECKIZFH-LABEL: fcmp_false: @@ -23,6 +31,16 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: li a0, 0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_false: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: li a0, 0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_false: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: li a0, 0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp false half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -47,6 +65,22 @@ ; RV64I-NEXT: fmv.h.x ft1, a0 ; RV64I-NEXT: feq.h a0, ft1, ft0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_oeq: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: feq.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_oeq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp oeq half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -71,6 +105,22 @@ ; RV64I-NEXT: fmv.h.x ft1, a1 ; RV64I-NEXT: flt.h a0, ft1, ft0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_ogt: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ogt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a1 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ogt half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -95,6 +145,22 @@ ; RV64I-NEXT: fmv.h.x ft1, a1 ; RV64I-NEXT: fle.h a0, ft1, ft0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_oge: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_oge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a1 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp oge half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -119,6 +185,22 @@ ; RV64I-NEXT: fmv.h.x ft1, a0 ; RV64I-NEXT: flt.h a0, ft1, ft0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_olt: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_olt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp olt half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -143,6 +225,22 @@ ; RV64I-NEXT: fmv.h.x ft1, a0 ; RV64I-NEXT: fle.h a0, ft1, ft0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_ole: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ole: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ole half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -173,6 +271,26 @@ ; RV64I-NEXT: flt.h a1, ft0, ft1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_one: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a1, ft0, ft1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_one: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: flt.s a1, ft1, ft0 +; CHECKIZFHMIN-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp one half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -203,6 +321,26 @@ ; RV64I-NEXT: feq.h a1, ft0, ft0 ; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_ord: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: feq.s a0, ft0, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ord: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: feq.s a0, ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ord half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -236,6 +374,28 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_ueq: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a1, ft0, ft1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ueq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: flt.s a1, ft1, ft0 +; CHECKIZFHMIN-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ueq half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -263,6 +423,24 @@ ; RV64I-NEXT: fle.h a0, ft1, ft0 ; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_ugt: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ugt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ugt half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -290,6 +468,24 @@ ; RV64I-NEXT: flt.h a0, ft1, ft0 ; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_uge: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_uge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp uge half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -317,6 +513,24 @@ ; RV64I-NEXT: fle.h a0, ft1, ft0 ; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_ult: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ult: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a1 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ult half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -344,6 +558,24 @@ ; RV64I-NEXT: flt.h a0, ft1, ft0 ; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_ule: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_ule: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a1 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ule half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -371,6 +603,24 @@ ; RV64I-NEXT: feq.h a0, ft1, ft0 ; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_une: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: feq.s a0, ft1, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_une: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp une half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -404,6 +654,28 @@ ; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_uno: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: feq.s a0, ft0, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_uno: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.h.x ft0, a0 +; CHECKIZFHMIN-NEXT: fmv.h.x ft1, a1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: feq.s a0, ft1, ft1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp uno half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -424,6 +696,16 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-ILP32F-LP64F-LABEL: fcmp_true: +; CHECKIZFHMIN-ILP32F-LP64F: # %bb.0: +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: li a0, 1 +; CHECKIZFHMIN-ILP32F-LP64F-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fcmp_true: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: li a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp true half %a, %b %2 = zext i1 %1 to i32 ret i32 %2 diff --git a/llvm/test/CodeGen/RISCV/half-frem.ll b/llvm/test/CodeGen/RISCV/half-frem.ll --- a/llvm/test/CodeGen/RISCV/half-frem.ll +++ b/llvm/test/CodeGen/RISCV/half-frem.ll @@ -5,6 +5,12 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s \ ; RUN: | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s \ +; RUN: | FileCheck -check-prefix=RV32IZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s \ +; RUN: | FileCheck -check-prefix=RV64IZFHMIN %s define half @frem_f16(half %a, half %b) nounwind { ; RV32IZFH-LABEL: frem_f16: @@ -30,6 +36,30 @@ ; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFH-NEXT: addi sp, sp, 16 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: frem_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: fcvt.s.h fa1, fa1 +; RV32IZFHMIN-NEXT: call fmodf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: frem_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: fcvt.s.h fa1, fa1 +; RV64IZFHMIN-NEXT: call fmodf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = frem half %a, %b ret half %1 } diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll --- a/llvm/test/CodeGen/RISCV/half-imm.ll +++ b/llvm/test/CodeGen/RISCV/half-imm.ll @@ -3,6 +3,10 @@ ; RUN: -target-abi ilp32f < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s ; TODO: constant pool shouldn't be necessary for RV32IZfh and RV64IZfh define half @half_imm() nounwind { @@ -11,6 +15,12 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa0, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: half_imm: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI0_0) +; CHECKIZFHMIN-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; CHECKIZFHMIN-NEXT: ret ret half 3.0 } @@ -21,6 +31,15 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: half_imm_op: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) +; CHECKIZFHMIN-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fadd half %a, 1.0 ret half %1 } diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -18,6 +18,19 @@ ; RUN: -verify-machineinstrs | \ ; RUN: FileCheck -check-prefix=RV64I %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zfhmin \ +; RUN: -verify-machineinstrs -target-abi ilp32f | \ +; RUN: FileCheck -check-prefixes=CHECKIZFHMIN,RV32IZFHMIN,RV32IFZFHMIN %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zfhmin \ +; RUN: -verify-machineinstrs -target-abi lp64f | \ +; RUN: FileCheck -check-prefixes=CHECKIZFHMIN,RV64IZFHMIN,RV64IFZFHMIN %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -mattr=+zfhmin -verify-machineinstrs -target-abi ilp32d | \ +; RUN: FileCheck -check-prefixes=CHECKIZFHMIN,RV32IZFHMIN,RV32IDZFHMIN %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+zfhmin -verify-machineinstrs -target-abi lp64d | \ +; RUN: FileCheck -check-prefixes=CHECKIZFHMIN,RV64IZFHMIN,RV64IDZFHMIN %s + declare half @llvm.sqrt.f16(half) define half @sqrt_f16(half %a) nounwind { @@ -51,6 +64,13 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: sqrt_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fsqrt.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.sqrt.f16(half %a) ret half %1 } @@ -114,6 +134,29 @@ ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: powi_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call __powisf2@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: powi_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: sext.w a0, a0 +; RV64IZFHMIN-NEXT: call __powisf2@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.powi.f16.i32(half %a, i32 %b) ret half %1 } @@ -168,6 +211,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: sin_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call sinf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: sin_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call sinf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.sin.f16(half %a) ret half %1 } @@ -222,6 +287,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: cos_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call cosf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: cos_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call cosf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.cos.f16(half %a) ret half %1 } @@ -377,6 +464,98 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; RV32IFZFHMIN-LABEL: sincos_f16: +; RV32IFZFHMIN: # %bb.0: +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs1, 4(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IFZFHMIN-NEXT: call sinf@plt +; RV32IFZFHMIN-NEXT: fcvt.h.s fs1, fa0 +; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IFZFHMIN-NEXT: call cosf@plt +; RV32IFZFHMIN-NEXT: fcvt.h.s ft0, fa0 +; RV32IFZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IFZFHMIN-NEXT: fcvt.s.h ft1, fs1 +; RV32IFZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs1, 4(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: ret +; +; RV64IFZFHMIN-LABEL: sincos_f16: +; RV64IFZFHMIN: # %bb.0: +; RV64IFZFHMIN-NEXT: addi sp, sp, -16 +; RV64IFZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV64IFZFHMIN-NEXT: fsw fs1, 0(sp) # 4-byte Folded Spill +; RV64IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV64IFZFHMIN-NEXT: fmv.s fa0, fs0 +; RV64IFZFHMIN-NEXT: call sinf@plt +; RV64IFZFHMIN-NEXT: fcvt.h.s fs1, fa0 +; RV64IFZFHMIN-NEXT: fmv.s fa0, fs0 +; RV64IFZFHMIN-NEXT: call cosf@plt +; RV64IFZFHMIN-NEXT: fcvt.h.s ft0, fa0 +; RV64IFZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IFZFHMIN-NEXT: fcvt.s.h ft1, fs1 +; RV64IFZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV64IFZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IFZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV64IFZFHMIN-NEXT: flw fs1, 0(sp) # 4-byte Folded Reload +; RV64IFZFHMIN-NEXT: addi sp, sp, 16 +; RV64IFZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: sincos_f16: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: addi sp, sp, -32 +; RV32IDZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32IDZFHMIN-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IDZFHMIN-NEXT: call sinf@plt +; RV32IDZFHMIN-NEXT: fcvt.h.s fs1, fa0 +; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IDZFHMIN-NEXT: call cosf@plt +; RV32IDZFHMIN-NEXT: fcvt.h.s ft0, fa0 +; RV32IDZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IDZFHMIN-NEXT: fcvt.s.h ft1, fs1 +; RV32IDZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV32IDZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IDZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32IDZFHMIN-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; RV32IDZFHMIN-NEXT: addi sp, sp, 32 +; RV32IDZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: sincos_f16: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: addi sp, sp, -32 +; RV64IDZFHMIN-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IDZFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV64IDZFHMIN-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; RV64IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV64IDZFHMIN-NEXT: fmv.s fa0, fs0 +; RV64IDZFHMIN-NEXT: call sinf@plt +; RV64IDZFHMIN-NEXT: fcvt.h.s fs1, fa0 +; RV64IDZFHMIN-NEXT: fmv.s fa0, fs0 +; RV64IDZFHMIN-NEXT: call cosf@plt +; RV64IDZFHMIN-NEXT: fcvt.h.s ft0, fa0 +; RV64IDZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IDZFHMIN-NEXT: fcvt.s.h ft1, fs1 +; RV64IDZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV64IDZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IDZFHMIN-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IDZFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV64IDZFHMIN-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; RV64IDZFHMIN-NEXT: addi sp, sp, 32 +; RV64IDZFHMIN-NEXT: ret %1 = call half @llvm.sin.f16(half %a) %2 = call half @llvm.cos.f16(half %a) %3 = fadd half %1, %2 @@ -461,6 +640,30 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: pow_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: fcvt.s.h fa1, fa1 +; RV32IZFHMIN-NEXT: call powf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: pow_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: fcvt.s.h fa1, fa1 +; RV64IZFHMIN-NEXT: call powf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.pow.f16(half %a, half %b) ret half %1 } @@ -515,6 +718,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: exp_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call expf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: exp_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call expf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.exp.f16(half %a) ret half %1 } @@ -569,6 +794,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: exp2_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call exp2f@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: exp2_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call exp2f@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.exp2.f16(half %a) ret half %1 } @@ -623,6 +870,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: log_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call logf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: log_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call logf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.log.f16(half %a) ret half %1 } @@ -677,6 +946,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: log10_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call log10f@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: log10_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call log10f@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.log10.f16(half %a) ret half %1 } @@ -731,6 +1022,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: log2_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call log2f@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: log2_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call log2f@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.log2.f16(half %a) ret half %1 } @@ -808,6 +1121,15 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fma_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa2 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft2, fa0 +; CHECKIZFHMIN-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.fma.f16(half %a, half %b, half %c) ret half %1 } @@ -895,6 +1217,18 @@ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fmuladd_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fmul.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa2 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.fmuladd.f16(half %a, half %b, half %c) ret half %1 } @@ -918,6 +1252,13 @@ ; RV64I-NEXT: slli a0, a0, 49 ; RV64I-NEXT: srli a0, a0, 49 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fabs_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.fabs.f16(half %a) ret half %1 } @@ -981,6 +1322,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: minnum_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fmin.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.minnum.f16(half %a, half %b) ret half %1 } @@ -1044,6 +1393,14 @@ ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: maxnum_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fmax.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.maxnum.f16(half %a, half %b) ret half %1 } @@ -1090,6 +1447,36 @@ ; RV64I-NEXT: srli a0, a0, 49 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: copysign_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: fsh fa1, 12(sp) +; RV32IZFHMIN-NEXT: fsh fa0, 8(sp) +; RV32IZFHMIN-NEXT: lbu a0, 13(sp) +; RV32IZFHMIN-NEXT: lbu a1, 9(sp) +; RV32IZFHMIN-NEXT: andi a0, a0, 128 +; RV32IZFHMIN-NEXT: andi a1, a1, 127 +; RV32IZFHMIN-NEXT: or a0, a1, a0 +; RV32IZFHMIN-NEXT: sb a0, 9(sp) +; RV32IZFHMIN-NEXT: flh fa0, 8(sp) +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: copysign_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: fsh fa1, 8(sp) +; RV64IZFHMIN-NEXT: fsh fa0, 0(sp) +; RV64IZFHMIN-NEXT: lbu a0, 9(sp) +; RV64IZFHMIN-NEXT: lbu a1, 1(sp) +; RV64IZFHMIN-NEXT: andi a0, a0, 128 +; RV64IZFHMIN-NEXT: andi a1, a1, 127 +; RV64IZFHMIN-NEXT: or a0, a1, a0 +; RV64IZFHMIN-NEXT: sb a0, 1(sp) +; RV64IZFHMIN-NEXT: flh fa0, 0(sp) +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.copysign.f16(half %a, half %b) ret half %1 } @@ -1136,6 +1523,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: floor_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI17_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB17_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB17_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.floor.f16(half %a) ret half %1 } @@ -1182,6 +1585,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: ceil_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI18_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB18_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB18_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.ceil.f16(half %a) ret half %1 } @@ -1228,6 +1647,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: trunc_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB19_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB19_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.trunc.f16(half %a) ret half %1 } @@ -1274,6 +1709,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: rint_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI20_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB20_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0 +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB20_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.rint.f16(half %a) ret half %1 } @@ -1328,6 +1779,28 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: nearbyint_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call nearbyintf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: nearbyint_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call nearbyintf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = call half @llvm.nearbyint.f16(half %a) ret half %1 } @@ -1374,6 +1847,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: round_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI22_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI22_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB22_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB22_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.round.f16(half %a) ret half %1 } @@ -1420,6 +1909,22 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; CHECKIZFHMIN-LABEL: roundeven_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI23_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB23_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB23_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = call half @llvm.roundeven.f16(half %a) ret half %1 } diff --git a/llvm/test/CodeGen/RISCV/half-isnan.ll b/llvm/test/CodeGen/RISCV/half-isnan.ll --- a/llvm/test/CodeGen/RISCV/half-isnan.ll +++ b/llvm/test/CodeGen/RISCV/half-isnan.ll @@ -3,6 +3,10 @@ ; RUN: -target-abi ilp32f < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s define zeroext i1 @half_is_nan(half %a) nounwind { ; CHECK-LABEL: half_is_nan: @@ -10,6 +14,13 @@ ; CHECK-NEXT: feq.h a0, fa0, fa0 ; CHECK-NEXT: xori a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: half_is_nan: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft0 +; CHECKIZFHMIN-NEXT: xori a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp uno half %a, 0.000000e+00 ret i1 %1 } @@ -19,6 +30,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: feq.h a0, fa0, fa0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: half_not_nan: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ord half %a, 0.000000e+00 ret i1 %1 } diff --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll --- a/llvm/test/CodeGen/RISCV/half-mem.ll +++ b/llvm/test/CodeGen/RISCV/half-mem.ll @@ -3,6 +3,10 @@ ; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFH,RV32IZFH %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFH,RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN,RV32IZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFHMIN,RV64IZFHMIN %s define half @flh(half *%a) nounwind { ; CHECKIZFH-LABEL: flh: @@ -11,6 +15,16 @@ ; CHECKIZFH-NEXT: flh ft1, 6(a0) ; CHECKIZFH-NEXT: fadd.h fa0, ft0, ft1 ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: flh: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: flh ft0, 6(a0) +; CHECKIZFHMIN-NEXT: flh ft1, 0(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = load half, half* %a %2 = getelementptr half, half* %a, i32 3 %3 = load half, half* %2 @@ -29,6 +43,16 @@ ; CHECKIZFH-NEXT: fsh ft0, 0(a0) ; CHECKIZFH-NEXT: fsh ft0, 16(a0) ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: fsh: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fsh ft0, 0(a0) +; CHECKIZFHMIN-NEXT: fsh ft0, 16(a0) +; CHECKIZFHMIN-NEXT: ret %1 = fadd half %b, %c store half %1, half* %a %2 = getelementptr half, half* %a, i32 8 @@ -52,6 +76,20 @@ ; CHECKIZFH-NEXT: flh ft0, 18(a1) ; CHECKIZFH-NEXT: fsh fa0, 18(a1) ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: flh_fsh_global: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: lui a0, %hi(G) +; CHECKIZFHMIN-NEXT: flh ft0, %lo(G)(a0) +; CHECKIZFHMIN-NEXT: addi a1, a0, %lo(G) +; CHECKIZFHMIN-NEXT: fsh fa0, %lo(G)(a0) +; CHECKIZFHMIN-NEXT: flh ft0, 18(a1) +; CHECKIZFHMIN-NEXT: fsh fa0, 18(a1) +; CHECKIZFHMIN-NEXT: ret %1 = fadd half %a, %b %2 = load volatile half, half* @G store half %1, half* @G @@ -79,6 +117,29 @@ ; RV64IZFH-NEXT: fadd.h fa0, fa0, ft0 ; RV64IZFH-NEXT: fsh fa0, -273(a0) ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: flh_fsh_constant: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, 912092 +; RV32IZFHMIN-NEXT: flh ft0, -273(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: fsh fa0, -273(a0) +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: flh_fsh_constant: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, 228023 +; RV64IZFHMIN-NEXT: slli a0, a0, 2 +; RV64IZFHMIN-NEXT: flh ft0, -273(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: fsh fa0, -273(a0) +; RV64IZFHMIN-NEXT: ret %1 = inttoptr i32 3735928559 to half* %2 = load volatile half, half* %1 %3 = fadd half %a, %2 @@ -118,6 +179,42 @@ ; RV64IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV64IZFH-NEXT: addi sp, sp, 16 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: flh_stack: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fmv.s fs0, fa0 +; RV32IZFHMIN-NEXT: addi a0, sp, 4 +; RV32IZFHMIN-NEXT: call notdead@plt +; RV32IZFHMIN-NEXT: flh ft0, 4(sp) +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fs0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: flh_stack: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV64IZFHMIN-NEXT: fmv.s fs0, fa0 +; RV64IZFHMIN-NEXT: mv a0, sp +; RV64IZFHMIN-NEXT: call notdead@plt +; RV64IZFHMIN-NEXT: flh ft0, 0(sp) +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fs0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = alloca half, align 4 %2 = bitcast half* %1 to i8* call void @notdead(i8* %2) @@ -150,6 +247,36 @@ ; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFH-NEXT: addi sp, sp, 16 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: fsh_stack: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV32IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV32IZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fsh ft0, 8(sp) +; RV32IZFHMIN-NEXT: addi a0, sp, 8 +; RV32IZFHMIN-NEXT: call notdead@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: fsh_stack: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fsh ft0, 4(sp) +; RV64IZFHMIN-NEXT: addi a0, sp, 4 +; RV64IZFHMIN-NEXT: call notdead@plt +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret %1 = fadd half %a, %b ; force store from FPR16 %2 = alloca half, align 4 store half %1, half* %2 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -3,6 +3,10 @@ ; RUN: -target-abi=ilp32f | FileCheck -check-prefixes=CHECKIZFH,RV32IZFH %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64f | FileCheck -check-prefixes=CHECKIZFH,RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefixes=CHECKIZFHMIN,RV32IZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefixes=CHECKIZFHMIN,RV64IZFHMIN %s define signext i32 @test_floor_si32(half %x) { ; CHECKIZFH-LABEL: test_floor_si32: @@ -13,6 +17,28 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_floor_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI0_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI0_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB0_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB0_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-NEXT: seqz a1, a1 +; CHECKIZFHMIN-NEXT: addi a1, a1, -1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %b @@ -76,6 +102,79 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB1_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB1_2: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fle.s s0, ft1, fs0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixsfdi@plt +; RV32IZFHMIN-NEXT: lui a3, 524288 +; RV32IZFHMIN-NEXT: bnez s0, .LBB1_4 +; RV32IZFHMIN-NEXT: # %bb.3: +; RV32IZFHMIN-NEXT: lui a1, 524288 +; RV32IZFHMIN-NEXT: .LBB1_4: +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI1_2) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI1_2)(a2) +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: beqz a2, .LBB1_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a1, a3, -1 +; RV32IZFHMIN-NEXT: .LBB1_6: +; RV32IZFHMIN-NEXT: feq.s a3, fs0, fs0 +; RV32IZFHMIN-NEXT: seqz a3, a3 +; RV32IZFHMIN-NEXT: addi a3, a3, -1 +; RV32IZFHMIN-NEXT: and a1, a3, a1 +; RV32IZFHMIN-NEXT: neg a4, s0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a3, a0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB1_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB1_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) ret i64 %b @@ -90,6 +189,50 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_ui32: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI2_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI2_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB2_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB2_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV32IZFHMIN-NEXT: seqz a1, a1 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_ui32: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI2_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI2_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB2_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB2_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a0, a1 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) ret i32 %b @@ -140,6 +283,66 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI3_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB3_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB3_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fmv.w.x ft0, zero +; RV32IZFHMIN-NEXT: fle.s a0, ft0, fs0 +; RV32IZFHMIN-NEXT: neg s0, a0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixunssfdi@plt +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI3_1) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI3_1)(a2) +; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI3_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB3_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB3_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) ret i64 %b @@ -154,6 +357,28 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_ceil_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI4_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI4_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB4_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB4_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-NEXT: seqz a1, a1 +; CHECKIZFHMIN-NEXT: addi a1, a1, -1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %b @@ -217,6 +442,79 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB5_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB5_2: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI5_1)(a0) +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fle.s s0, ft1, fs0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixsfdi@plt +; RV32IZFHMIN-NEXT: lui a3, 524288 +; RV32IZFHMIN-NEXT: bnez s0, .LBB5_4 +; RV32IZFHMIN-NEXT: # %bb.3: +; RV32IZFHMIN-NEXT: lui a1, 524288 +; RV32IZFHMIN-NEXT: .LBB5_4: +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI5_2) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI5_2)(a2) +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: beqz a2, .LBB5_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a1, a3, -1 +; RV32IZFHMIN-NEXT: .LBB5_6: +; RV32IZFHMIN-NEXT: feq.s a3, fs0, fs0 +; RV32IZFHMIN-NEXT: seqz a3, a3 +; RV32IZFHMIN-NEXT: addi a3, a3, -1 +; RV32IZFHMIN-NEXT: and a1, a3, a1 +; RV32IZFHMIN-NEXT: neg a4, s0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a3, a0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI5_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB5_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB5_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) ret i64 %b @@ -231,6 +529,50 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_ui32: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI6_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI6_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB6_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB6_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV32IZFHMIN-NEXT: seqz a1, a1 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_ui32: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI6_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI6_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB6_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB6_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a0, a1 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) ret i32 %b @@ -281,6 +623,66 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB7_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB7_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fmv.w.x ft0, zero +; RV32IZFHMIN-NEXT: fle.s a0, ft0, fs0 +; RV32IZFHMIN-NEXT: neg s0, a0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixunssfdi@plt +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI7_1) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI7_1)(a2) +; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI7_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB7_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB7_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) ret i64 %b @@ -295,6 +697,28 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_trunc_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI8_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI8_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB8_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB8_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-NEXT: seqz a1, a1 +; CHECKIZFHMIN-NEXT: addi a1, a1, -1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %b @@ -358,6 +782,79 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI9_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB9_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB9_2: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI9_1)(a0) +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fle.s s0, ft1, fs0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixsfdi@plt +; RV32IZFHMIN-NEXT: lui a3, 524288 +; RV32IZFHMIN-NEXT: bnez s0, .LBB9_4 +; RV32IZFHMIN-NEXT: # %bb.3: +; RV32IZFHMIN-NEXT: lui a1, 524288 +; RV32IZFHMIN-NEXT: .LBB9_4: +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI9_2) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI9_2)(a2) +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: beqz a2, .LBB9_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a1, a3, -1 +; RV32IZFHMIN-NEXT: .LBB9_6: +; RV32IZFHMIN-NEXT: feq.s a3, fs0, fs0 +; RV32IZFHMIN-NEXT: seqz a3, a3 +; RV32IZFHMIN-NEXT: addi a3, a3, -1 +; RV32IZFHMIN-NEXT: and a1, a3, a1 +; RV32IZFHMIN-NEXT: neg a4, s0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a3, a0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI9_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI9_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB9_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB9_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) ret i64 %b @@ -372,6 +869,50 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_ui32: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI10_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB10_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB10_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV32IZFHMIN-NEXT: seqz a1, a1 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_ui32: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI10_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB10_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB10_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a0, a1 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) ret i32 %b @@ -422,6 +963,66 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB11_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB11_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fmv.w.x ft0, zero +; RV32IZFHMIN-NEXT: fle.s a0, ft0, fs0 +; RV32IZFHMIN-NEXT: neg s0, a0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixunssfdi@plt +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI11_1) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI11_1)(a2) +; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB11_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB11_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) ret i64 %b @@ -436,6 +1037,28 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_round_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI12_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB12_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB12_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-NEXT: seqz a1, a1 +; CHECKIZFHMIN-NEXT: addi a1, a1, -1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %b @@ -499,6 +1122,79 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI13_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB13_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB13_2: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI13_1)(a0) +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fle.s s0, ft1, fs0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixsfdi@plt +; RV32IZFHMIN-NEXT: lui a3, 524288 +; RV32IZFHMIN-NEXT: bnez s0, .LBB13_4 +; RV32IZFHMIN-NEXT: # %bb.3: +; RV32IZFHMIN-NEXT: lui a1, 524288 +; RV32IZFHMIN-NEXT: .LBB13_4: +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI13_2) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI13_2)(a2) +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: beqz a2, .LBB13_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a1, a3, -1 +; RV32IZFHMIN-NEXT: .LBB13_6: +; RV32IZFHMIN-NEXT: feq.s a3, fs0, fs0 +; RV32IZFHMIN-NEXT: seqz a3, a3 +; RV32IZFHMIN-NEXT: addi a3, a3, -1 +; RV32IZFHMIN-NEXT: and a1, a3, a1 +; RV32IZFHMIN-NEXT: neg a4, s0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a3, a0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI13_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB13_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB13_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) ret i64 %b @@ -513,6 +1209,50 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_ui32: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI14_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI14_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB14_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB14_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV32IZFHMIN-NEXT: seqz a1, a1 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_ui32: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI14_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI14_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB14_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB14_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a0, a1 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) ret i32 %b @@ -563,6 +1303,66 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI15_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB15_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB15_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fmv.w.x ft0, zero +; RV32IZFHMIN-NEXT: fle.s a0, ft0, fs0 +; RV32IZFHMIN-NEXT: neg s0, a0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixunssfdi@plt +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI15_1) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI15_1)(a2) +; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI15_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI15_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB15_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB15_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) ret i64 %b @@ -577,6 +1377,28 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_roundeven_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI16_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI16_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB16_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB16_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: feq.s a1, ft0, ft0 +; CHECKIZFHMIN-NEXT: seqz a1, a1 +; CHECKIZFHMIN-NEXT: addi a1, a1, -1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %b @@ -640,6 +1462,79 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI17_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB17_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB17_2: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI17_1)(a0) +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fle.s s0, ft1, fs0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixsfdi@plt +; RV32IZFHMIN-NEXT: lui a3, 524288 +; RV32IZFHMIN-NEXT: bnez s0, .LBB17_4 +; RV32IZFHMIN-NEXT: # %bb.3: +; RV32IZFHMIN-NEXT: lui a1, 524288 +; RV32IZFHMIN-NEXT: .LBB17_4: +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI17_2) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI17_2)(a2) +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: beqz a2, .LBB17_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a1, a3, -1 +; RV32IZFHMIN-NEXT: .LBB17_6: +; RV32IZFHMIN-NEXT: feq.s a3, fs0, fs0 +; RV32IZFHMIN-NEXT: seqz a3, a3 +; RV32IZFHMIN-NEXT: addi a3, a3, -1 +; RV32IZFHMIN-NEXT: and a1, a3, a1 +; RV32IZFHMIN-NEXT: neg a4, s0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a3, a0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI17_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB17_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB17_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) ret i64 %b @@ -654,6 +1549,50 @@ ; CHECKIZFH-NEXT: addi a1, a1, -1 ; CHECKIZFH-NEXT: and a0, a1, a0 ; CHECKIZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_ui32: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI18_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB18_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB18_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV32IZFHMIN-NEXT: seqz a1, a1 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_ui32: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI18_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB18_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB18_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a0, a1 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) ret i32 %b @@ -704,6 +1643,66 @@ ; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a1, a0 ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB19_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB19_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFHMIN-NEXT: fmv.w.x ft0, zero +; RV32IZFHMIN-NEXT: fle.s a0, ft0, fs0 +; RV32IZFHMIN-NEXT: neg s0, a0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixunssfdi@plt +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI19_1) +; RV32IZFHMIN-NEXT: flw ft0, %lo(.LCPI19_1)(a2) +; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: flt.s a2, ft0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB19_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB19_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: feq.s a1, ft0, ft0 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) ret i64 %b diff --git a/llvm/test/CodeGen/RISCV/half-round-conv.ll b/llvm/test/CodeGen/RISCV/half-round-conv.ll --- a/llvm/test/CodeGen/RISCV/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv.ll @@ -3,6 +3,10 @@ ; RUN: -target-abi=ilp32f | FileCheck -check-prefixes=CHECKIZFH,RV32IZFH %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64f | FileCheck -check-prefixes=CHECKIZFH,RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefixes=CHECKIZFHMIN,RV32IZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefixes=CHECKIZFHMIN,RV64IZFHMIN %s define signext i8 @test_floor_si8(half %x) { ; RV32IZFH-LABEL: test_floor_si8: @@ -14,6 +18,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rdn ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_si8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI0_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI0_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB0_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB0_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_si8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI0_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI0_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB0_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB0_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptosi half %a to i8 ret i8 %b @@ -29,6 +69,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rdn ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_si16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB1_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB1_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_si16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB1_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB1_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptosi half %a to i16 ret i16 %b @@ -39,6 +115,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rdn ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_floor_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI2_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI2_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB2_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB2_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptosi half %a to i32 ret i32 %b @@ -70,6 +164,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rdn ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI3_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB3_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB3_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixhfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI3_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB3_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB3_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptosi half %a to i64 ret i64 %b @@ -85,6 +220,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rdn ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_ui8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI4_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI4_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB4_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB4_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_ui8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI4_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI4_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB4_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB4_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptoui half %a to i8 ret i8 %b @@ -100,6 +271,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rdn ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_ui16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB5_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB5_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_ui16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI5_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB5_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB5_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptoui half %a to i16 ret i16 %b @@ -110,6 +317,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rdn ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_floor_ui32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI6_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI6_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB6_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB6_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptoui half %a to i32 ret i32 %b @@ -141,6 +366,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rdn ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_floor_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB7_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB7_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixunshfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_floor_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI7_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB7_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB7_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) %b = fptoui half %a to i64 ret i64 %b @@ -156,6 +422,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rup ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_si8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI8_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI8_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB8_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB8_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_si8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI8_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI8_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB8_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB8_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptosi half %a to i8 ret i8 %b @@ -171,6 +473,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rup ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_si16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI9_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB9_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB9_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_si16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI9_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI9_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB9_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB9_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptosi half %a to i16 ret i16 %b @@ -181,6 +519,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rup ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_ceil_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI10_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB10_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB10_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptosi half %a to i32 ret i32 %b @@ -212,6 +568,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rup ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB11_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB11_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixhfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB11_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB11_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptosi half %a to i64 ret i64 %b @@ -227,6 +624,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rup ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_ui8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI12_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB12_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB12_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_ui8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI12_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB12_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB12_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptoui half %a to i8 ret i8 %b @@ -242,6 +675,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rup ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_ui16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI13_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB13_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB13_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_ui16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI13_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB13_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB13_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptoui half %a to i16 ret i16 %b @@ -252,6 +721,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rup ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_ceil_ui32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI14_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI14_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB14_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB14_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptoui half %a to i32 ret i32 %b @@ -283,6 +770,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rup ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_ceil_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI15_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB15_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB15_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixunshfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_ceil_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI15_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI15_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB15_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB15_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) %b = fptoui half %a to i64 ret i64 %b @@ -298,6 +826,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_si8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI16_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI16_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB16_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB16_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_si8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI16_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI16_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB16_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB16_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptosi half %a to i8 ret i8 %b @@ -313,6 +877,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_si16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI17_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB17_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB17_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_si16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI17_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB17_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB17_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptosi half %a to i16 ret i16 %b @@ -323,6 +923,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_trunc_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI18_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI18_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB18_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB18_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptosi half %a to i32 ret i32 %b @@ -354,6 +972,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB19_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB19_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixhfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB19_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB19_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptosi half %a to i64 ret i64 %b @@ -369,6 +1028,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_ui8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI20_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB20_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB20_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_ui8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI20_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI20_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB20_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB20_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptoui half %a to i8 ret i8 %b @@ -384,6 +1079,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_ui16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI21_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI21_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB21_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB21_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_ui16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI21_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI21_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB21_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB21_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptoui half %a to i16 ret i16 %b @@ -394,6 +1125,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_trunc_ui32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI22_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI22_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB22_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB22_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptoui half %a to i32 ret i32 %b @@ -425,6 +1174,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_trunc_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI23_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB23_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB23_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixunshfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_trunc_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI23_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB23_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB23_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) %b = fptoui half %a to i64 ret i64 %b @@ -440,6 +1230,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_si8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI24_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI24_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB24_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB24_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_si8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI24_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI24_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB24_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB24_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptosi half %a to i8 ret i8 %b @@ -455,6 +1281,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_si16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI25_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI25_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB25_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB25_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_si16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI25_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI25_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB25_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB25_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptosi half %a to i16 ret i16 %b @@ -465,6 +1327,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rmm ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_round_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI26_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI26_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB26_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB26_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptosi half %a to i32 ret i32 %b @@ -496,6 +1376,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI27_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI27_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB27_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB27_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixhfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI27_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI27_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB27_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB27_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptosi half %a to i64 ret i64 %b @@ -511,6 +1432,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rmm ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_ui8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI28_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI28_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB28_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB28_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_ui8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI28_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI28_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB28_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB28_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptoui half %a to i8 ret i8 %b @@ -526,6 +1483,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rmm ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_ui16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI29_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI29_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB29_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB29_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_ui16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI29_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI29_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB29_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB29_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptoui half %a to i16 ret i16 %b @@ -536,6 +1529,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rmm ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_round_ui32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI30_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI30_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB30_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB30_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptoui half %a to i32 ret i32 %b @@ -567,6 +1578,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rmm ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_round_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI31_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI31_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB31_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB31_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixunshfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_round_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI31_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI31_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB31_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB31_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) %b = fptoui half %a to i64 ret i64 %b @@ -582,6 +1634,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rne ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_si8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI32_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB32_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB32_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_si8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI32_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB32_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB32_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptosi half %a to i8 ret i8 %b @@ -597,6 +1685,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rne ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_si16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI33_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI33_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB33_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB33_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_si16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI33_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI33_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB33_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB33_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptosi half %a to i16 ret i16 %b @@ -607,6 +1731,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rne ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_roundeven_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI34_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI34_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB34_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB34_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptosi half %a to i32 ret i32 %b @@ -638,6 +1780,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rne ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI35_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI35_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB35_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB35_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixhfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI35_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI35_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB35_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB35_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptosi half %a to i64 ret i64 %b @@ -653,6 +1836,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rne ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_ui8: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI36_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB36_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB36_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_ui8: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI36_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI36_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB36_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB36_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptoui half %a to i8 ret i8 %b @@ -668,6 +1887,42 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rne ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_ui16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI37_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI37_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB37_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB37_2: +; RV32IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_ui16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI37_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI37_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB37_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB37_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptoui half %a to i16 ret i16 %b @@ -678,6 +1933,24 @@ ; CHECKIZFH: # %bb.0: ; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0, rne ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_roundeven_ui32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI38_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI38_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB38_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB38_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptoui half %a to i32 ret i32 %b @@ -709,6 +1982,47 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rne ; RV64IZFH-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_roundeven_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: .cfi_offset ra, -4 +; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI39_0) +; RV32IZFHMIN-NEXT: flw ft1, %lo(.LCPI39_0)(a0) +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV32IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV32IZFHMIN-NEXT: beqz a0, .LBB39_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV32IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV32IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFHMIN-NEXT: .LBB39_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: call __fixunshfdi@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_roundeven_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI39_0) +; RV64IZFHMIN-NEXT: flw ft1, %lo(.LCPI39_0)(a0) +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fabs.s ft2, ft0 +; RV64IZFHMIN-NEXT: flt.s a0, ft2, ft1 +; RV64IZFHMIN-NEXT: beqz a0, .LBB39_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; RV64IZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; RV64IZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: .LBB39_2: +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) %b = fptoui half %a to i64 ret i64 %b @@ -749,6 +2063,22 @@ ; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 ; CHECKIZFH-NEXT: .LBB40_2: ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_floor_half: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI40_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI40_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB40_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rdn +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rdn +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB40_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) ret half %a } @@ -788,6 +2118,22 @@ ; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 ; CHECKIZFH-NEXT: .LBB41_2: ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_ceil_half: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI41_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI41_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB41_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rup +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rup +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB41_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) ret half %a } @@ -827,6 +2173,22 @@ ; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 ; CHECKIZFH-NEXT: .LBB42_2: ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_trunc_half: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI42_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI42_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB42_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rtz +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB42_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) ret half %a } @@ -866,6 +2228,22 @@ ; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 ; CHECKIZFH-NEXT: .LBB43_2: ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_round_half: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI43_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI43_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB43_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rmm +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB43_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) ret half %a } @@ -905,6 +2283,22 @@ ; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 ; CHECKIZFH-NEXT: .LBB44_2: ; CHECKIZFH-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_roundeven_half: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: lui a0, %hi(.LCPI44_0) +; CHECKIZFHMIN-NEXT: flw ft1, %lo(.LCPI44_0)(a0) +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fabs.s ft2, ft0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft2, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB44_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, ft0, rne +; CHECKIZFHMIN-NEXT: fcvt.s.w ft1, a0, rne +; CHECKIZFHMIN-NEXT: fsgnj.s ft0, ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB44_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) ret half %a } diff --git a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll @@ -3,12 +3,21 @@ ; RUN: -target-abi ilp32f < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=CHECKIZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIZFHMIN %s define half @select_fcmp_false(half %a, half %b) nounwind { ; CHECK-LABEL: select_fcmp_false: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_false: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fmv.s fa0, fa1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp false half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -23,6 +32,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_oeq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: bnez a0, .LBB1_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB1_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp oeq half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -37,6 +58,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_ogt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-NEXT: bnez a0, .LBB2_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB2_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ogt half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -51,6 +84,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_oge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-NEXT: bnez a0, .LBB3_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB3_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp oge half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -65,6 +110,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_olt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: bnez a0, .LBB4_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB4_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp olt half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -79,6 +136,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_ole: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: bnez a0, .LBB5_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB5_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ole half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -95,6 +164,20 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB6_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_one: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: flt.s a1, ft1, ft0 +; CHECKIZFHMIN-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-NEXT: bnez a0, .LBB6_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB6_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp one half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -111,6 +194,20 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_ord: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: feq.s a1, ft1, ft1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: bnez a0, .LBB7_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB7_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ord half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -127,6 +224,20 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_ueq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: flt.s a1, ft1, ft0 +; CHECKIZFHMIN-NEXT: or a0, a1, a0 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB8_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB8_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ueq half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -141,6 +252,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_ugt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB9_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB9_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ugt half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -155,6 +278,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB10_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_uge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: flt.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB10_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB10_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp uge half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -169,6 +304,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB11_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_ult: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB11_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB11_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ult half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -183,6 +330,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_ule: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: flt.s a0, ft1, ft0 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB12_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB12_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ule half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -197,6 +356,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB13_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_une: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft1 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB13_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft0, ft1 +; CHECKIZFHMIN-NEXT: .LBB13_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp une half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -213,6 +384,20 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB14_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_uno: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: feq.s a0, ft0, ft0 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: feq.s a1, ft1, ft1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB14_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fmv.s ft1, ft0 +; CHECKIZFHMIN-NEXT: .LBB14_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp uno half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -222,6 +407,10 @@ ; CHECK-LABEL: select_fcmp_true: ; CHECK: # %bb.0: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_true: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: ret %1 = fcmp true half %a, %b %2 = select i1 %1, half %a, half %b ret half %2 @@ -237,6 +426,17 @@ ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB16_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: i32_select_fcmp_oeq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: feq.s a2, ft1, ft0 +; CHECKIZFHMIN-NEXT: bnez a2, .LBB16_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: mv a0, a1 +; CHECKIZFHMIN-NEXT: .LBB16_2: +; CHECKIZFHMIN-NEXT: ret %1 = fcmp oeq half %a, %b %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 @@ -249,6 +449,15 @@ ; CHECK-NEXT: li a1, 2 ; CHECK-NEXT: sub a0, a1, a0 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_oeq_1_2: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: feq.s a0, ft1, ft0 +; CHECKIZFHMIN-NEXT: li a1, 2 +; CHECKIZFHMIN-NEXT: sub a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp fast oeq half %a, %b %2 = select i1 %1, i32 1, i32 2 ret i32 %2 @@ -260,6 +469,14 @@ ; CHECK-NEXT: fle.h a0, fa0, fa1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_uge_negone_zero: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-NEXT: addi a0, a0, -1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ugt half %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 @@ -271,6 +488,14 @@ ; CHECK-NEXT: fle.h a0, fa0, fa1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_fcmp_uge_1_2: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; CHECKIZFHMIN-NEXT: fle.s a0, ft1, ft0 +; CHECKIZFHMIN-NEXT: addi a0, a0, 1 +; CHECKIZFHMIN-NEXT: ret %1 = fcmp ugt half %a, %b %2 = select i1 %1, i32 1, i32 2 ret i32 %2 diff --git a/llvm/test/CodeGen/RISCV/half-select-icmp.ll b/llvm/test/CodeGen/RISCV/half-select-icmp.ll --- a/llvm/test/CodeGen/RISCV/half-select-icmp.ll +++ b/llvm/test/CodeGen/RISCV/half-select-icmp.ll @@ -3,6 +3,10 @@ ; RUN: -target-abi ilp32f < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=CHECKIZFHMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIZFHMIN %s define half @select_icmp_eq(i32 signext %a, i32 signext %b, half %c, half %d) { ; CHECK-LABEL: select_icmp_eq: @@ -12,6 +16,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_eq: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: beq a0, a1, .LBB0_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB0_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp eq i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -25,6 +41,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_ne: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: bne a0, a1, .LBB1_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB1_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp ne i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -38,6 +66,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_ugt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: bltu a1, a0, .LBB2_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB2_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp ugt i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -51,6 +91,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_uge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: bgeu a0, a1, .LBB3_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB3_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp uge i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -64,6 +116,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_ult: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: bltu a0, a1, .LBB4_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB4_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp ult i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -77,6 +141,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_ule: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: bgeu a1, a0, .LBB5_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB5_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp ule i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -90,6 +166,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB6_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_sgt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: blt a1, a0, .LBB6_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB6_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp sgt i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -103,6 +191,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_sge: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: bge a0, a1, .LBB7_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB7_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp sge i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -116,6 +216,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_slt: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: blt a0, a1, .LBB8_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB8_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp slt i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 @@ -129,6 +241,18 @@ ; CHECK-NEXT: fmv.h fa0, fa1 ; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: ret +; +; CHECKIZFHMIN-LABEL: select_icmp_sle: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: bge a1, a0, .LBB9_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret +; CHECKIZFHMIN-NEXT: .LBB9_2: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; CHECKIZFHMIN-NEXT: ret %1 = icmp sle i32 %a, %b %2 = select i1 %1, half %c, half %d ret half %2 diff --git a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert-strict.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s | \ +; RUN: FileCheck %s -check-prefix=RV64IZFHMIN + +; This file exhaustively checks half<->i32 conversions. + +define i32 @aext_fptosi(half %a) nounwind { +; RV64IZFHMIN-LABEL: aext_fptosi: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) + +define signext i32 @sext_fptosi(half %a) nounwind { +; RV64IZFHMIN-LABEL: sext_fptosi: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(half %a) nounwind { +; RV64IZFHMIN-LABEL: zext_fptosi: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: slli a0, a0, 32 +; RV64IZFHMIN-NEXT: srli a0, a0, 32 +; RV64IZFHMIN-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(half %a) nounwind { +; RV64IZFHMIN-LABEL: aext_fptoui: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) + +define signext i32 @sext_fptoui(half %a) nounwind { +; RV64IZFHMIN-LABEL: sext_fptoui: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(half %a) nounwind { +; RV64IZFHMIN-LABEL: zext_fptoui: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define half @uitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFHMIN-LABEL: uitofp_aext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: slli a0, a0, 32 +; RV64IZFHMIN-NEXT: srli a0, a0, 32 +; RV64IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata, metadata) + +define half @uitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFHMIN-LABEL: uitofp_sext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: slli a0, a0, 32 +; RV64IZFHMIN-NEXT: srli a0, a0, 32 +; RV64IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @uitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFHMIN-LABEL: uitofp_zext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFHMIN-LABEL: sitofp_aext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: sext.w a0, a0 +; RV64IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata, metadata) + +define half @sitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFHMIN-LABEL: sitofp_sext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFHMIN-LABEL: sitofp_zext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: sext.w a0, a0 +; RV64IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck %s -check-prefix=RV64IZFHMIN + +; This file exhaustively checks half<->i32 conversions. + +define i32 @aext_fptosi(half %a) nounwind { +; RV64IZFHMIN-LABEL: aext_fptosi: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = fptosi half %a to i32 + ret i32 %1 +} + +define signext i32 @sext_fptosi(half %a) nounwind { +; RV64IZFHMIN-LABEL: sext_fptosi: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = fptosi half %a to i32 + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(half %a) nounwind { +; RV64IZFHMIN-LABEL: zext_fptosi: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: slli a0, a0, 32 +; RV64IZFHMIN-NEXT: srli a0, a0, 32 +; RV64IZFHMIN-NEXT: ret + %1 = fptosi half %a to i32 + ret i32 %1 +} + +define i32 @aext_fptoui(half %a) nounwind { +; RV64IZFHMIN-LABEL: aext_fptoui: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = fptoui half %a to i32 + ret i32 %1 +} + +define signext i32 @sext_fptoui(half %a) nounwind { +; RV64IZFHMIN-LABEL: sext_fptoui: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = fptoui half %a to i32 + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(half %a) nounwind { +; RV64IZFHMIN-LABEL: zext_fptoui: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IZFHMIN-NEXT: ret + %1 = fptoui half %a to i32 + ret i32 %1 +} + +define i16 @bcvt_f16_to_aext_i16(half %a, half %b) nounwind { +; RV64IZFHMIN-LABEL: bcvt_f16_to_aext_i16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fmv.x.h a0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = fadd half %a, %b + %2 = bitcast half %1 to i16 + ret i16 %2 +} + +define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind { +; RV64IZFHMIN-LABEL: bcvt_f16_to_sext_i16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fmv.x.h a0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = fadd half %a, %b + %2 = bitcast half %1 to i16 + ret i16 %2 +} + +define zeroext i16 @bcvt_f16_to_zext_i16(half %a, half %b) nounwind { +; RV64IZFHMIN-LABEL: bcvt_f16_to_zext_i16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, fa0 +; RV64IZFHMIN-NEXT: fadd.s ft0, ft1, ft0 +; RV64IZFHMIN-NEXT: fcvt.h.s ft0, ft0 +; RV64IZFHMIN-NEXT: fmv.x.h a0, ft0 +; RV64IZFHMIN-NEXT: slli a0, a0, 48 +; RV64IZFHMIN-NEXT: srli a0, a0, 48 +; RV64IZFHMIN-NEXT: ret + %1 = fadd half %a, %b + %2 = bitcast half %1 to i16 + ret i16 %2 +} + +define half @bcvt_i64_to_f16_via_i16(i64 %a, i64 %b) nounwind { +; RV64IZFHMIN-LABEL: bcvt_i64_to_f16_via_i16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fmv.h.x ft0, a0 +; RV64IZFHMIN-NEXT: fmv.h.x ft1, a1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft1, ft1 +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, ft0 +; RV64IZFHMIN-NEXT: fadd.s ft0, ft0, ft1 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = trunc i64 %a to i16 + %2 = trunc i64 %b to i16 + %3 = bitcast i16 %1 to half + %4 = bitcast i16 %2 to half + %5 = fadd half %3, %4 + ret half %5 +} + +define half @uitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFHMIN-LABEL: uitofp_aext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: slli a0, a0, 32 +; RV64IZFHMIN-NEXT: srli a0, a0, 32 +; RV64IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = uitofp i32 %a to half + ret half %1 +} + +define half @uitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFHMIN-LABEL: uitofp_sext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: slli a0, a0, 32 +; RV64IZFHMIN-NEXT: srli a0, a0, 32 +; RV64IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = uitofp i32 %a to half + ret half %1 +} + +define half @uitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFHMIN-LABEL: uitofp_zext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.lu ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = uitofp i32 %a to half + ret half %1 +} + +define half @sitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFHMIN-LABEL: sitofp_aext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: sext.w a0, a0 +; RV64IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = sitofp i32 %a to half + ret half %1 +} + +define half @sitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFHMIN-LABEL: sitofp_sext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = sitofp i32 %a to half + ret half %1 +} + +define half @sitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFHMIN-LABEL: sitofp_zext_i32_to_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: sext.w a0, a0 +; RV64IZFHMIN-NEXT: fcvt.s.l ft0, a0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = sitofp i32 %a to half + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-intrinsics.ll b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-intrinsics.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+zfhmin \ +; RUN: -verify-machineinstrs -target-abi lp64f | \ +; RUN: FileCheck -check-prefix=CHECKIZFHMIN %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+zfhmin -verify-machineinstrs -target-abi lp64d | \ +; RUN: FileCheck -check-prefix=CHECKIZFHMIN %s + +; These intrinsics require half and i64 to be legal types. + +declare i64 @llvm.llrint.i64.f16(half) + +define i64 @llrint_f16(half %a) nounwind { +; CHECKIZFHMIN-LABEL: llrint_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.l.s a0, ft0 +; CHECKIZFHMIN-NEXT: ret + %1 = call i64 @llvm.llrint.i64.f16(half %a) + ret i64 %1 +} + +declare i64 @llvm.llround.i64.f16(half) + +define i64 @llround_f16(half %a) nounwind { +; CHECKIZFHMIN-LABEL: llround_f16: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; CHECKIZFHMIN-NEXT: fcvt.l.s a0, ft0, rmm +; CHECKIZFHMIN-NEXT: ret + %1 = call i64 @llvm.llround.i64.f16(half %a) + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics-strict.ll @@ -0,0 +1,309 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zfhmin \ +; RUN: -verify-machineinstrs -target-abi ilp32f -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV32IZFHMIN %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zfhmin \ +; RUN: -verify-machineinstrs -target-abi lp64f -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV64IZFHMIN %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -mattr=+zfhmin -verify-machineinstrs -target-abi ilp32d \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IZFHMIN %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+zfhmin -verify-machineinstrs -target-abi lp64d \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IZFHMIN %s + +declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) + +define half @sqrt_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: sqrt_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fsqrt.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: sqrt_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fsqrt.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.sqrt.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare half @llvm.experimental.constrained.floor.f16(half, metadata) + +define half @floor_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: floor_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call floorf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: floor_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call floorf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.floor.f16(half %a, metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare half @llvm.experimental.constrained.ceil.f16(half, metadata) + +define half @ceil_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: ceil_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call ceilf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: ceil_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call ceilf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.ceil.f16(half %a, metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare half @llvm.experimental.constrained.trunc.f16(half, metadata) + +define half @trunc_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: trunc_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call truncf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: trunc_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call truncf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.trunc.f16(half %a, metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) + +define half @rint_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: rint_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call rintf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: rint_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call rintf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.rint.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) + +define half @nearbyint_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: nearbyint_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call nearbyintf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: nearbyint_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call nearbyintf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.nearbyint.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare half @llvm.experimental.constrained.round.f16(half, metadata) + +define half @round_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: round_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call roundf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: round_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call roundf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.round.f16(half %a, metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) + +define half @roundeven_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: roundeven_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call roundevenf@plt +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: roundeven_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call roundevenf@plt +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret + %1 = call half @llvm.experimental.constrained.roundeven.f16(half %a, metadata !"fpexcept.strict") strictfp + ret half %1 +} + +declare iXLen @llvm.experimental.constrained.lrint.iXLen.f16(half, metadata, metadata) + +define iXLen @lrint_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: lrint_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: lrint_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lrint.iXLen.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} + +declare iXLen @llvm.experimental.constrained.lround.iXLen.f16(half, metadata) + +define iXLen @lround_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: lround_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: lround_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lround.iXLen.f16(half %a, metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} + +declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata) + +define i64 @llrint_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: llrint_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call llrintf@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: llrint_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0 +; RV64IZFHMIN-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret i64 %1 +} + +declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata) + +define i64 @llround_f16(half %a) nounwind strictfp { +; RV32IZFHMIN-LABEL: llround_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call llroundf@plt +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: llround_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llround.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics.ll b/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zfhmin \ +; RUN: -verify-machineinstrs -target-abi ilp32f | \ +; RUN: FileCheck -check-prefix=RV32IZFHMIN %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zfhmin \ +; RUN: -verify-machineinstrs -target-abi lp64f | \ +; RUN: FileCheck -check-prefix=RV64IZFHMIN %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -mattr=+zfhmin -verify-machineinstrs -target-abi ilp32d | \ +; RUN: FileCheck -check-prefix=RV32IDZFHMIN %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+zfhmin -verify-machineinstrs -target-abi lp64d | \ +; RUN: FileCheck -check-prefix=RV64IDZFHMIN %s + +; These intrinsics require half to be a legal type. + +declare iXLen @llvm.lrint.iXLen.f16(half) + +define iXLen @lrint_f16(half %a) nounwind { +; RV32IZFHMIN-LABEL: lrint_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: lrint_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0 +; RV64IZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: lrint_f16: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFHMIN-NEXT: fcvt.w.s a0, ft0 +; RV32IDZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: lrint_f16: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFHMIN-NEXT: fcvt.l.s a0, ft0 +; RV64IDZFHMIN-NEXT: ret + %1 = call iXLen @llvm.lrint.iXLen.f16(half %a) + ret iXLen %1 +} + +declare iXLen @llvm.lround.iXLen.f16(half) + +define iXLen @lround_f16(half %a) nounwind { +; RV32IZFHMIN-LABEL: lround_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: lround_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: lround_f16: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFHMIN-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IDZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: lround_f16: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFHMIN-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IDZFHMIN-NEXT: ret + %1 = call iXLen @llvm.lround.iXLen.f16(half %a) + ret iXLen %1 +} diff --git a/llvm/test/CodeGen/RISCV/zfhmin-imm.ll b/llvm/test/CodeGen/RISCV/zfhmin-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zfhmin-imm.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32f -mattr=+zfhmin < %s \ +; RUN: | FileCheck --check-prefix=RV32IZFHMIN %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+zfhmin,+d < %s \ +; RUN: | FileCheck --check-prefix=RV32IDZFHMIN %s +; RUN: llc -mtriple=riscv64 -target-abi lp64f -mattr=+zfhmin < %s \ +; RUN: | FileCheck --check-prefix=RV64IZFHMIN %s +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+zfhmin,+d < %s \ +; RUN: | FileCheck --check-prefix=RV64IDZFHMIN %s + +define half @f16_positive_zero(half *%pf) nounwind { +; RV32IZFHMIN-LABEL: f16_positive_zero: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fmv.h.x fa0, zero +; RV32IZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: f16_positive_zero: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fmv.h.x fa0, zero +; RV32IDZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: f16_positive_zero: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fmv.h.x fa0, zero +; RV64IZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: f16_positive_zero: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fmv.h.x fa0, zero +; RV64IDZFHMIN-NEXT: ret + ret half 0.0 +} + +define half @f16_negative_zero(half *%pf) nounwind { +; RV32IZFHMIN-LABEL: f16_negative_zero: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fmv.w.x ft0, zero +; RV32IZFHMIN-NEXT: fneg.s ft0, ft0 +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IZFHMIN-NEXT: ret +; +; RV32IDZFHMIN-LABEL: f16_negative_zero: +; RV32IDZFHMIN: # %bb.0: +; RV32IDZFHMIN-NEXT: fmv.w.x ft0, zero +; RV32IDZFHMIN-NEXT: fneg.s ft0, ft0 +; RV32IDZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV32IDZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: f16_negative_zero: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fmv.w.x ft0, zero +; RV64IZFHMIN-NEXT: fneg.s ft0, ft0 +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IZFHMIN-NEXT: ret +; +; RV64IDZFHMIN-LABEL: f16_negative_zero: +; RV64IDZFHMIN: # %bb.0: +; RV64IDZFHMIN-NEXT: fmv.w.x ft0, zero +; RV64IDZFHMIN-NEXT: fneg.s ft0, ft0 +; RV64IDZFHMIN-NEXT: fcvt.h.s fa0, ft0 +; RV64IDZFHMIN-NEXT: ret + ret half -0.0 +}