diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -84,8 +84,14 @@ FMV_X_ANYEXTH, FMV_W_X_RV64, FMV_X_ANYEXTW_RV64, + // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and + // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of + // range inputs. These are used for FP_TO_S/UINT_SAT lowering. + FCVT_X_RTZ, + FCVT_XU_RTZ, // FP to 32 bit int conversions for RV64. These are used to keep track of the - // result being sign extended to 64 bit. + // result being sign extended to 64 bit. These saturate out of range inputs. + // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. FCVT_W_RTZ_RV64, FCVT_WU_RTZ_RV64, // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -376,6 +376,9 @@ } if (Subtarget.hasStdExtF()) { + setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); + setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); + setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } @@ -1379,6 +1382,32 @@ return false; } +static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) { + // RISCV FP-to-int conversions saturate to the destination register size, but + // don't produce 0 for nan. We can use a conversion instruction and fix the + // nan case with a compare and a select. + SDValue Src = Op.getOperand(0); + + EVT DstVT = Op.getValueType(); + EVT SatVT = cast(Op.getOperand(1))->getVT(); + + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; + unsigned Opc; + if (SatVT == DstVT) + Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ; + else if (DstVT == MVT::i64 && SatVT == MVT::i32) + Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; + else + return SDValue(); + // FIXME: Support other SatVTs by clamping before or after the conversion. + + SDLoc DL(Op); + SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src); + + SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); + return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); +} + static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -2489,6 +2518,9 @@ Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); return convertFromScalableVector(VT, Src, DAG, Subtarget); } + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + return lowerFP_TO_INT_SAT(Op, DAG); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: @@ -8357,6 +8389,8 @@ NODE_NAME_CASE(FMV_X_ANYEXTH) NODE_NAME_CASE(FMV_W_X_RV64) NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) + NODE_NAME_CASE(FCVT_X_RTZ) + NODE_NAME_CASE(FCVT_XU_RTZ) NODE_NAME_CASE(FCVT_W_RTZ_RV64) NODE_NAME_CASE(FCVT_WU_RTZ_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -331,6 +331,10 @@ def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; +// Saturating double->[u]int. +def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>; +def : Pat<(i32 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_WU_D $rs1, 0b001)>; + // float->int32 with current rounding mode. def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>; @@ -361,6 +365,10 @@ def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; +// Saturating double->[u]int. +def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>; +def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>; + // double->[u]int64. Round-to-zero must be used. def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -21,6 +21,8 @@ : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; def STD_RISCVFCVT_W_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>; +def STD_RISCVFCVT_X + : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>; def riscv_fmv_w_x_rv64 : SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>; @@ -30,6 +32,10 @@ : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>; def riscv_fcvt_wu_rtz_rv64 : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>; +def riscv_fcvt_x_rtz + : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>; +def riscv_fcvt_xu_rtz + : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -379,6 +385,10 @@ def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; +// Saturating float->[u]int. +def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(i32 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; + // float->int32 with current rounding mode. def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>; @@ -407,6 +417,10 @@ def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; +// Saturating float->[u]int. +def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; +def : Pat<(i64 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; + // float->int64 with current rounding mode. def : Pat<(i64 (lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>; def : Pat<(i64 (llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -338,6 +338,10 @@ def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; +// Saturating float->[u]int. +def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(i32 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; + // half->int32 with current rounding mode. def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>; @@ -360,6 +364,10 @@ def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; +// Saturating float->[u]int. +def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; +def : Pat<(i64 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; + // half->int64 with current rounding mode. def : Pat<(i64 (lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>; def : Pat<(i64 (llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>; diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -84,12 +84,6 @@ ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB3_2: -; RV32IFD-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IFD-NEXT: fld ft1, %lo(.LCPI3_0)(a0) -; RV32IFD-NEXT: lui a0, %hi(.LCPI3_1) -; RV32IFD-NEXT: fld ft2, %lo(.LCPI3_1)(a0) -; RV32IFD-NEXT: fmax.d ft0, ft0, ft1 -; RV32IFD-NEXT: fmin.d ft0, ft0, ft2 ; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret @@ -103,13 +97,7 @@ ; RV64IFD-NEXT: mv a0, zero ; RV64IFD-NEXT: ret ; RV64IFD-NEXT: .LBB3_2: -; RV64IFD-NEXT: lui a0, %hi(.LCPI3_0) -; RV64IFD-NEXT: fld ft1, %lo(.LCPI3_0)(a0) -; RV64IFD-NEXT: lui a0, %hi(.LCPI3_1) -; RV64IFD-NEXT: fld ft2, %lo(.LCPI3_1)(a0) -; RV64IFD-NEXT: fmax.d ft0, ft0, ft1 -; RV64IFD-NEXT: fmin.d ft0, ft0, ft2 -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz ; RV64IFD-NEXT: ret start: %0 = tail call i32 @llvm.fptosi.sat.i32.f64(double %a) @@ -182,24 +170,27 @@ ; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: lui a0, %hi(.LCPI6_0) -; RV32IFD-NEXT: fld ft1, %lo(.LCPI6_0)(a0) -; RV32IFD-NEXT: fcvt.d.w ft2, zero -; RV32IFD-NEXT: fmax.d ft0, ft0, ft2 -; RV32IFD-NEXT: fmin.d ft0, ft0, ft1 +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB6_2 +; RV32IFD-NEXT: # %bb.1: # %start +; RV32IFD-NEXT: mv a0, zero +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB6_2: ; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_wu_d_sat: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a1, %hi(.LCPI6_0) -; RV64IFD-NEXT: fld ft0, %lo(.LCPI6_0)(a1) -; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: fmv.d.x ft2, zero -; RV64IFD-NEXT: fmax.d ft1, ft1, ft2 -; RV64IFD-NEXT: fmin.d ft0, ft1, ft0 -; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: feq.d a0, ft0, ft0 +; RV64IFD-NEXT: bnez a0, .LBB6_2 +; RV64IFD-NEXT: # %bb.1: # %start +; RV64IFD-NEXT: mv a0, zero +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB6_2: +; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz ; RV64IFD-NEXT: ret start: %0 = tail call i32 @llvm.fptoui.sat.i32.f64(double %a) @@ -370,33 +361,14 @@ ; ; RV64IFD-LABEL: fcvt_l_d_sat: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a1, %hi(.LCPI12_0) -; RV64IFD-NEXT: fld ft1, %lo(.LCPI12_0)(a1) ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fle.d a0, ft1, ft0 -; RV64IFD-NEXT: addi a1, zero, -1 +; RV64IFD-NEXT: feq.d a0, ft0, ft0 ; RV64IFD-NEXT: bnez a0, .LBB12_2 ; RV64IFD-NEXT: # %bb.1: # %start -; RV64IFD-NEXT: slli a0, a1, 63 -; RV64IFD-NEXT: j .LBB12_3 +; RV64IFD-NEXT: mv a0, zero +; RV64IFD-NEXT: ret ; RV64IFD-NEXT: .LBB12_2: ; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz -; RV64IFD-NEXT: .LBB12_3: # %start -; RV64IFD-NEXT: lui a2, %hi(.LCPI12_1) -; RV64IFD-NEXT: fld ft1, %lo(.LCPI12_1)(a2) -; RV64IFD-NEXT: flt.d a2, ft1, ft0 -; RV64IFD-NEXT: bnez a2, .LBB12_6 -; RV64IFD-NEXT: # %bb.4: # %start -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: beqz a1, .LBB12_7 -; RV64IFD-NEXT: .LBB12_5: # %start -; RV64IFD-NEXT: ret -; RV64IFD-NEXT: .LBB12_6: -; RV64IFD-NEXT: srli a0, a1, 1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: bnez a1, .LBB12_5 -; RV64IFD-NEXT: .LBB12_7: # %start -; RV64IFD-NEXT: mv a0, zero ; RV64IFD-NEXT: ret start: %0 = tail call i64 @llvm.fptosi.sat.i64.f64(double %a) @@ -469,23 +441,13 @@ ; RV64IFD-LABEL: fcvt_lu_d_sat: ; RV64IFD: # %bb.0: # %start ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fmv.d.x ft1, zero -; RV64IFD-NEXT: fle.d a0, ft1, ft0 +; RV64IFD-NEXT: feq.d a0, ft0, ft0 ; RV64IFD-NEXT: bnez a0, .LBB14_2 ; RV64IFD-NEXT: # %bb.1: # %start -; RV64IFD-NEXT: mv a1, zero -; RV64IFD-NEXT: j .LBB14_3 +; RV64IFD-NEXT: mv a0, zero +; RV64IFD-NEXT: ret ; RV64IFD-NEXT: .LBB14_2: -; RV64IFD-NEXT: fcvt.lu.d a1, ft0, rtz -; RV64IFD-NEXT: .LBB14_3: # %start -; RV64IFD-NEXT: lui a0, %hi(.LCPI14_0) -; RV64IFD-NEXT: fld ft1, %lo(.LCPI14_0)(a0) -; RV64IFD-NEXT: flt.d a2, ft1, ft0 -; RV64IFD-NEXT: addi a0, zero, -1 -; RV64IFD-NEXT: bnez a2, .LBB14_5 -; RV64IFD-NEXT: # %bb.4: # %start -; RV64IFD-NEXT: mv a0, a1 -; RV64IFD-NEXT: .LBB14_5: # %start +; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz ; RV64IFD-NEXT: ret start: %0 = tail call i64 @llvm.fptoui.sat.i64.f64(double %a) diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -25,65 +25,27 @@ define i32 @fcvt_w_s_sat(float %a) nounwind { ; RV32IF-LABEL: fcvt_w_s_sat: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IF-NEXT: flw ft1, %lo(.LCPI1_0)(a1) ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fle.s a0, ft1, ft0 -; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: feq.s a0, ft0, ft0 ; RV32IF-NEXT: bnez a0, .LBB1_2 ; RV32IF-NEXT: # %bb.1: # %start -; RV32IF-NEXT: lui a0, 524288 -; RV32IF-NEXT: j .LBB1_3 +; RV32IF-NEXT: mv a0, zero +; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB1_2: ; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IF-NEXT: .LBB1_3: # %start -; RV32IF-NEXT: lui a2, %hi(.LCPI1_1) -; RV32IF-NEXT: flw ft1, %lo(.LCPI1_1)(a2) -; RV32IF-NEXT: flt.s a2, ft1, ft0 -; RV32IF-NEXT: bnez a2, .LBB1_6 -; RV32IF-NEXT: # %bb.4: # %start -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: beqz a1, .LBB1_7 -; RV32IF-NEXT: .LBB1_5: # %start -; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB1_6: -; RV32IF-NEXT: addi a0, a1, -1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: bnez a1, .LBB1_5 -; RV32IF-NEXT: .LBB1_7: # %start -; RV32IF-NEXT: mv a0, zero ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_w_s_sat: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV64IF-NEXT: flw ft1, %lo(.LCPI1_0)(a1) ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fle.s a0, ft1, ft0 -; RV64IF-NEXT: lui a1, 524288 +; RV64IF-NEXT: feq.s a0, ft0, ft0 ; RV64IF-NEXT: bnez a0, .LBB1_2 ; RV64IF-NEXT: # %bb.1: # %start -; RV64IF-NEXT: lui a0, 524288 -; RV64IF-NEXT: j .LBB1_3 -; RV64IF-NEXT: .LBB1_2: -; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IF-NEXT: .LBB1_3: # %start -; RV64IF-NEXT: lui a2, %hi(.LCPI1_1) -; RV64IF-NEXT: flw ft1, %lo(.LCPI1_1)(a2) -; RV64IF-NEXT: flt.s a2, ft1, ft0 -; RV64IF-NEXT: bnez a2, .LBB1_6 -; RV64IF-NEXT: # %bb.4: # %start -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: beqz a1, .LBB1_7 -; RV64IF-NEXT: .LBB1_5: # %start -; RV64IF-NEXT: ret -; RV64IF-NEXT: .LBB1_6: -; RV64IF-NEXT: addiw a0, a1, -1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: bnez a1, .LBB1_5 -; RV64IF-NEXT: .LBB1_7: # %start ; RV64IF-NEXT: mv a0, zero ; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB1_2: +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret start: %0 = tail call i32 @llvm.fptosi.sat.i32.f32(float %a) ret i32 %0 @@ -143,45 +105,25 @@ ; RV32IF-LABEL: fcvt_wu_s_sat: ; RV32IF: # %bb.0: # %start ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fmv.w.x ft1, zero -; RV32IF-NEXT: fle.s a0, ft1, ft0 +; RV32IF-NEXT: feq.s a0, ft0, ft0 ; RV32IF-NEXT: bnez a0, .LBB4_2 ; RV32IF-NEXT: # %bb.1: # %start -; RV32IF-NEXT: mv a1, zero -; RV32IF-NEXT: j .LBB4_3 +; RV32IF-NEXT: mv a0, zero +; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB4_2: -; RV32IF-NEXT: fcvt.wu.s a1, ft0, rtz -; RV32IF-NEXT: .LBB4_3: # %start -; RV32IF-NEXT: lui a0, %hi(.LCPI4_0) -; RV32IF-NEXT: flw ft1, %lo(.LCPI4_0)(a0) -; RV32IF-NEXT: flt.s a2, ft1, ft0 -; RV32IF-NEXT: addi a0, zero, -1 -; RV32IF-NEXT: bnez a2, .LBB4_5 -; RV32IF-NEXT: # %bb.4: # %start -; RV32IF-NEXT: mv a0, a1 -; RV32IF-NEXT: .LBB4_5: # %start +; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_sat: ; RV64IF: # %bb.0: # %start ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fmv.w.x ft1, zero -; RV64IF-NEXT: fle.s a0, ft1, ft0 +; RV64IF-NEXT: feq.s a0, ft0, ft0 ; RV64IF-NEXT: bnez a0, .LBB4_2 ; RV64IF-NEXT: # %bb.1: # %start ; RV64IF-NEXT: mv a0, zero -; RV64IF-NEXT: j .LBB4_3 +; RV64IF-NEXT: ret ; RV64IF-NEXT: .LBB4_2: -; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IF-NEXT: .LBB4_3: # %start -; RV64IF-NEXT: lui a1, %hi(.LCPI4_0) -; RV64IF-NEXT: flw ft1, %lo(.LCPI4_0)(a1) -; RV64IF-NEXT: flt.s a1, ft1, ft0 -; RV64IF-NEXT: beqz a1, .LBB4_5 -; RV64IF-NEXT: # %bb.4: -; RV64IF-NEXT: addi a0, zero, -1 -; RV64IF-NEXT: srli a0, a0, 32 -; RV64IF-NEXT: .LBB4_5: # %start +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz ; RV64IF-NEXT: ret start: %0 = tail call i32 @llvm.fptoui.sat.i32.f32(float %a) @@ -382,33 +324,14 @@ ; ; RV64IF-LABEL: fcvt_l_s_sat: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: lui a1, %hi(.LCPI12_0) -; RV64IF-NEXT: flw ft1, %lo(.LCPI12_0)(a1) ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fle.s a0, ft1, ft0 -; RV64IF-NEXT: addi a1, zero, -1 +; RV64IF-NEXT: feq.s a0, ft0, ft0 ; RV64IF-NEXT: bnez a0, .LBB12_2 ; RV64IF-NEXT: # %bb.1: # %start -; RV64IF-NEXT: slli a0, a1, 63 -; RV64IF-NEXT: j .LBB12_3 +; RV64IF-NEXT: mv a0, zero +; RV64IF-NEXT: ret ; RV64IF-NEXT: .LBB12_2: ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IF-NEXT: .LBB12_3: # %start -; RV64IF-NEXT: lui a2, %hi(.LCPI12_1) -; RV64IF-NEXT: flw ft1, %lo(.LCPI12_1)(a2) -; RV64IF-NEXT: flt.s a2, ft1, ft0 -; RV64IF-NEXT: bnez a2, .LBB12_6 -; RV64IF-NEXT: # %bb.4: # %start -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: beqz a1, .LBB12_7 -; RV64IF-NEXT: .LBB12_5: # %start -; RV64IF-NEXT: ret -; RV64IF-NEXT: .LBB12_6: -; RV64IF-NEXT: srli a0, a1, 1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: bnez a1, .LBB12_5 -; RV64IF-NEXT: .LBB12_7: # %start -; RV64IF-NEXT: mv a0, zero ; RV64IF-NEXT: ret start: %0 = tail call i64 @llvm.fptosi.sat.i64.f32(float %a) @@ -481,23 +404,13 @@ ; RV64IF-LABEL: fcvt_lu_s_sat: ; RV64IF: # %bb.0: # %start ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fmv.w.x ft1, zero -; RV64IF-NEXT: fle.s a0, ft1, ft0 +; RV64IF-NEXT: feq.s a0, ft0, ft0 ; RV64IF-NEXT: bnez a0, .LBB14_2 ; RV64IF-NEXT: # %bb.1: # %start -; RV64IF-NEXT: mv a1, zero -; RV64IF-NEXT: j .LBB14_3 +; RV64IF-NEXT: mv a0, zero +; RV64IF-NEXT: ret ; RV64IF-NEXT: .LBB14_2: -; RV64IF-NEXT: fcvt.lu.s a1, ft0, rtz -; RV64IF-NEXT: .LBB14_3: # %start -; RV64IF-NEXT: lui a0, %hi(.LCPI14_0) -; RV64IF-NEXT: flw ft1, %lo(.LCPI14_0)(a0) -; RV64IF-NEXT: flt.s a2, ft1, ft0 -; RV64IF-NEXT: addi a0, zero, -1 -; RV64IF-NEXT: bnez a2, .LBB14_5 -; RV64IF-NEXT: # %bb.4: # %start -; RV64IF-NEXT: mv a0, a1 -; RV64IF-NEXT: .LBB14_5: # %start +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz ; RV64IF-NEXT: ret start: %0 = tail call i64 @llvm.fptoui.sat.i64.f32(float %a) diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -260,127 +260,47 @@ define i32 @fcvt_w_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_w_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: lui a0, %hi(.LCPI6_0) -; RV32IZFH-NEXT: flw ft1, %lo(.LCPI6_0)(a0) -; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IZFH-NEXT: fle.s a0, ft1, ft0 -; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 ; RV32IZFH-NEXT: bnez a0, .LBB6_2 ; RV32IZFH-NEXT: # %bb.1: # %start -; RV32IZFH-NEXT: lui a0, 524288 -; RV32IZFH-NEXT: j .LBB6_3 -; RV32IZFH-NEXT: .LBB6_2: -; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IZFH-NEXT: .LBB6_3: # %start -; RV32IZFH-NEXT: lui a2, %hi(.LCPI6_1) -; RV32IZFH-NEXT: flw ft1, %lo(.LCPI6_1)(a2) -; RV32IZFH-NEXT: flt.s a2, ft1, ft0 -; RV32IZFH-NEXT: bnez a2, .LBB6_6 -; RV32IZFH-NEXT: # %bb.4: # %start -; RV32IZFH-NEXT: feq.s a1, ft0, ft0 -; RV32IZFH-NEXT: beqz a1, .LBB6_7 -; RV32IZFH-NEXT: .LBB6_5: # %start -; RV32IZFH-NEXT: ret -; RV32IZFH-NEXT: .LBB6_6: -; RV32IZFH-NEXT: addi a0, a1, -1 -; RV32IZFH-NEXT: feq.s a1, ft0, ft0 -; RV32IZFH-NEXT: bnez a1, .LBB6_5 -; RV32IZFH-NEXT: .LBB6_7: # %start ; RV32IZFH-NEXT: mv a0, zero ; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB6_2: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_w_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI6_0) -; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI6_0)(a0) -; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IDZFH-NEXT: fle.s a0, ft1, ft0 -; RV32IDZFH-NEXT: lui a1, 524288 +; RV32IDZFH-NEXT: feq.h a0, fa0, fa0 ; RV32IDZFH-NEXT: bnez a0, .LBB6_2 ; RV32IDZFH-NEXT: # %bb.1: # %start -; RV32IDZFH-NEXT: lui a0, 524288 -; RV32IDZFH-NEXT: j .LBB6_3 -; RV32IDZFH-NEXT: .LBB6_2: -; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IDZFH-NEXT: .LBB6_3: # %start -; RV32IDZFH-NEXT: lui a2, %hi(.LCPI6_1) -; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI6_1)(a2) -; RV32IDZFH-NEXT: flt.s a2, ft1, ft0 -; RV32IDZFH-NEXT: bnez a2, .LBB6_6 -; RV32IDZFH-NEXT: # %bb.4: # %start -; RV32IDZFH-NEXT: feq.s a1, ft0, ft0 -; RV32IDZFH-NEXT: beqz a1, .LBB6_7 -; RV32IDZFH-NEXT: .LBB6_5: # %start -; RV32IDZFH-NEXT: ret -; RV32IDZFH-NEXT: .LBB6_6: -; RV32IDZFH-NEXT: addi a0, a1, -1 -; RV32IDZFH-NEXT: feq.s a1, ft0, ft0 -; RV32IDZFH-NEXT: bnez a1, .LBB6_5 -; RV32IDZFH-NEXT: .LBB6_7: # %start ; RV32IDZFH-NEXT: mv a0, zero ; RV32IDZFH-NEXT: ret +; RV32IDZFH-NEXT: .LBB6_2: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_w_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: lui a0, %hi(.LCPI6_0) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI6_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IZFH-NEXT: fle.s a0, ft1, ft0 -; RV64IZFH-NEXT: lui a1, 524288 +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IZFH-NEXT: bnez a0, .LBB6_2 ; RV64IZFH-NEXT: # %bb.1: # %start -; RV64IZFH-NEXT: lui a0, 524288 -; RV64IZFH-NEXT: j .LBB6_3 -; RV64IZFH-NEXT: .LBB6_2: -; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB6_3: # %start -; RV64IZFH-NEXT: lui a2, %hi(.LCPI6_1) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI6_1)(a2) -; RV64IZFH-NEXT: flt.s a2, ft1, ft0 -; RV64IZFH-NEXT: bnez a2, .LBB6_6 -; RV64IZFH-NEXT: # %bb.4: # %start -; RV64IZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IZFH-NEXT: beqz a1, .LBB6_7 -; RV64IZFH-NEXT: .LBB6_5: # %start -; RV64IZFH-NEXT: ret -; RV64IZFH-NEXT: .LBB6_6: -; RV64IZFH-NEXT: addiw a0, a1, -1 -; RV64IZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IZFH-NEXT: bnez a1, .LBB6_5 -; RV64IZFH-NEXT: .LBB6_7: # %start ; RV64IZFH-NEXT: mv a0, zero ; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB6_2: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI6_0) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI6_0)(a0) -; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 -; RV64IDZFH-NEXT: lui a1, 524288 +; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IDZFH-NEXT: bnez a0, .LBB6_2 ; RV64IDZFH-NEXT: # %bb.1: # %start -; RV64IDZFH-NEXT: lui a0, 524288 -; RV64IDZFH-NEXT: j .LBB6_3 -; RV64IDZFH-NEXT: .LBB6_2: -; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB6_3: # %start -; RV64IDZFH-NEXT: lui a2, %hi(.LCPI6_1) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI6_1)(a2) -; RV64IDZFH-NEXT: flt.s a2, ft1, ft0 -; RV64IDZFH-NEXT: bnez a2, .LBB6_6 -; RV64IDZFH-NEXT: # %bb.4: # %start -; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IDZFH-NEXT: beqz a1, .LBB6_7 -; RV64IDZFH-NEXT: .LBB6_5: # %start -; RV64IDZFH-NEXT: ret -; RV64IDZFH-NEXT: .LBB6_6: -; RV64IDZFH-NEXT: addiw a0, a1, -1 -; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IDZFH-NEXT: bnez a1, .LBB6_5 -; RV64IDZFH-NEXT: .LBB6_7: # %start ; RV64IDZFH-NEXT: mv a0, zero ; RV64IDZFH-NEXT: ret +; RV64IDZFH-NEXT: .LBB6_2: +; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret start: %0 = tail call i32 @llvm.fptosi.sat.i32.f16(half %a) ret i32 %0 @@ -414,90 +334,46 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_wu_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IZFH-NEXT: fmv.w.x ft1, zero -; RV32IZFH-NEXT: fle.s a0, ft1, ft0 +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 ; RV32IZFH-NEXT: bnez a0, .LBB8_2 ; RV32IZFH-NEXT: # %bb.1: # %start -; RV32IZFH-NEXT: mv a1, zero -; RV32IZFH-NEXT: j .LBB8_3 +; RV32IZFH-NEXT: mv a0, zero +; RV32IZFH-NEXT: ret ; RV32IZFH-NEXT: .LBB8_2: -; RV32IZFH-NEXT: fcvt.wu.s a1, ft0, rtz -; RV32IZFH-NEXT: .LBB8_3: # %start -; RV32IZFH-NEXT: lui a0, %hi(.LCPI8_0) -; RV32IZFH-NEXT: flw ft1, %lo(.LCPI8_0)(a0) -; RV32IZFH-NEXT: flt.s a2, ft1, ft0 -; RV32IZFH-NEXT: addi a0, zero, -1 -; RV32IZFH-NEXT: bnez a2, .LBB8_5 -; RV32IZFH-NEXT: # %bb.4: # %start -; RV32IZFH-NEXT: mv a0, a1 -; RV32IZFH-NEXT: .LBB8_5: # %start +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV32IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_wu_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV32IDZFH-NEXT: fmv.w.x ft1, zero -; RV32IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV32IDZFH-NEXT: feq.h a0, fa0, fa0 ; RV32IDZFH-NEXT: bnez a0, .LBB8_2 ; RV32IDZFH-NEXT: # %bb.1: # %start -; RV32IDZFH-NEXT: mv a1, zero -; RV32IDZFH-NEXT: j .LBB8_3 +; RV32IDZFH-NEXT: mv a0, zero +; RV32IDZFH-NEXT: ret ; RV32IDZFH-NEXT: .LBB8_2: -; RV32IDZFH-NEXT: fcvt.wu.s a1, ft0, rtz -; RV32IDZFH-NEXT: .LBB8_3: # %start -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI8_0) -; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI8_0)(a0) -; RV32IDZFH-NEXT: flt.s a2, ft1, ft0 -; RV32IDZFH-NEXT: addi a0, zero, -1 -; RV32IDZFH-NEXT: bnez a2, .LBB8_5 -; RV32IDZFH-NEXT: # %bb.4: # %start -; RV32IDZFH-NEXT: mv a0, a1 -; RV32IDZFH-NEXT: .LBB8_5: # %start +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV32IDZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_wu_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IZFH-NEXT: fmv.w.x ft1, zero -; RV64IZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IZFH-NEXT: bnez a0, .LBB8_2 ; RV64IZFH-NEXT: # %bb.1: # %start ; RV64IZFH-NEXT: mv a0, zero -; RV64IZFH-NEXT: j .LBB8_3 +; RV64IZFH-NEXT: ret ; RV64IZFH-NEXT: .LBB8_2: -; RV64IZFH-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB8_3: # %start -; RV64IZFH-NEXT: lui a1, %hi(.LCPI8_0) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI8_0)(a1) -; RV64IZFH-NEXT: flt.s a1, ft1, ft0 -; RV64IZFH-NEXT: beqz a1, .LBB8_5 -; RV64IZFH-NEXT: # %bb.4: -; RV64IZFH-NEXT: addi a0, zero, -1 -; RV64IZFH-NEXT: srli a0, a0, 32 -; RV64IZFH-NEXT: .LBB8_5: # %start +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IDZFH-NEXT: fmv.w.x ft1, zero -; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IDZFH-NEXT: bnez a0, .LBB8_2 ; RV64IDZFH-NEXT: # %bb.1: # %start ; RV64IDZFH-NEXT: mv a0, zero -; RV64IDZFH-NEXT: j .LBB8_3 +; RV64IDZFH-NEXT: ret ; RV64IDZFH-NEXT: .LBB8_2: -; RV64IDZFH-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB8_3: # %start -; RV64IDZFH-NEXT: lui a1, %hi(.LCPI8_0) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI8_0)(a1) -; RV64IDZFH-NEXT: flt.s a1, ft1, ft0 -; RV64IDZFH-NEXT: beqz a1, .LBB8_5 -; RV64IDZFH-NEXT: # %bb.4: -; RV64IDZFH-NEXT: addi a0, zero, -1 -; RV64IDZFH-NEXT: srli a0, a0, 32 -; RV64IDZFH-NEXT: .LBB8_5: # %start +; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret start: %0 = tail call i32 @llvm.fptoui.sat.i32.f16(half %a) @@ -652,65 +528,25 @@ ; ; RV64IZFH-LABEL: fcvt_l_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: lui a0, %hi(.LCPI10_0) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI10_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IZFH-NEXT: fle.s a0, ft1, ft0 -; RV64IZFH-NEXT: addi a1, zero, -1 +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IZFH-NEXT: bnez a0, .LBB10_2 ; RV64IZFH-NEXT: # %bb.1: # %start -; RV64IZFH-NEXT: slli a0, a1, 63 -; RV64IZFH-NEXT: j .LBB10_3 -; RV64IZFH-NEXT: .LBB10_2: -; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB10_3: # %start -; RV64IZFH-NEXT: lui a2, %hi(.LCPI10_1) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI10_1)(a2) -; RV64IZFH-NEXT: flt.s a2, ft1, ft0 -; RV64IZFH-NEXT: bnez a2, .LBB10_6 -; RV64IZFH-NEXT: # %bb.4: # %start -; RV64IZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IZFH-NEXT: beqz a1, .LBB10_7 -; RV64IZFH-NEXT: .LBB10_5: # %start -; RV64IZFH-NEXT: ret -; RV64IZFH-NEXT: .LBB10_6: -; RV64IZFH-NEXT: srli a0, a1, 1 -; RV64IZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IZFH-NEXT: bnez a1, .LBB10_5 -; RV64IZFH-NEXT: .LBB10_7: # %start ; RV64IZFH-NEXT: mv a0, zero ; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB10_2: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_l_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI10_0) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI10_0)(a0) -; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 -; RV64IDZFH-NEXT: addi a1, zero, -1 +; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IDZFH-NEXT: bnez a0, .LBB10_2 ; RV64IDZFH-NEXT: # %bb.1: # %start -; RV64IDZFH-NEXT: slli a0, a1, 63 -; RV64IDZFH-NEXT: j .LBB10_3 -; RV64IDZFH-NEXT: .LBB10_2: -; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB10_3: # %start -; RV64IDZFH-NEXT: lui a2, %hi(.LCPI10_1) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI10_1)(a2) -; RV64IDZFH-NEXT: flt.s a2, ft1, ft0 -; RV64IDZFH-NEXT: bnez a2, .LBB10_6 -; RV64IDZFH-NEXT: # %bb.4: # %start -; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IDZFH-NEXT: beqz a1, .LBB10_7 -; RV64IDZFH-NEXT: .LBB10_5: # %start -; RV64IDZFH-NEXT: ret -; RV64IDZFH-NEXT: .LBB10_6: -; RV64IDZFH-NEXT: srli a0, a1, 1 -; RV64IDZFH-NEXT: feq.s a1, ft0, ft0 -; RV64IDZFH-NEXT: bnez a1, .LBB10_5 -; RV64IDZFH-NEXT: .LBB10_7: # %start ; RV64IDZFH-NEXT: mv a0, zero ; RV64IDZFH-NEXT: ret +; RV64IDZFH-NEXT: .LBB10_2: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret start: %0 = tail call i64 @llvm.fptosi.sat.i64.f16(half %a) ret i64 %0 @@ -838,46 +674,24 @@ ; ; RV64IZFH-LABEL: fcvt_lu_h_sat: ; RV64IZFH: # %bb.0: # %start -; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IZFH-NEXT: fmv.w.x ft1, zero -; RV64IZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IZFH-NEXT: bnez a0, .LBB12_2 ; RV64IZFH-NEXT: # %bb.1: # %start -; RV64IZFH-NEXT: mv a1, zero -; RV64IZFH-NEXT: j .LBB12_3 +; RV64IZFH-NEXT: mv a0, zero +; RV64IZFH-NEXT: ret ; RV64IZFH-NEXT: .LBB12_2: -; RV64IZFH-NEXT: fcvt.lu.s a1, ft0, rtz -; RV64IZFH-NEXT: .LBB12_3: # %start -; RV64IZFH-NEXT: lui a0, %hi(.LCPI12_0) -; RV64IZFH-NEXT: flw ft1, %lo(.LCPI12_0)(a0) -; RV64IZFH-NEXT: flt.s a2, ft1, ft0 -; RV64IZFH-NEXT: addi a0, zero, -1 -; RV64IZFH-NEXT: bnez a2, .LBB12_5 -; RV64IZFH-NEXT: # %bb.4: # %start -; RV64IZFH-NEXT: mv a0, a1 -; RV64IZFH-NEXT: .LBB12_5: # %start +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz ; RV64IZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_lu_h_sat: ; RV64IDZFH: # %bb.0: # %start -; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 -; RV64IDZFH-NEXT: fmv.w.x ft1, zero -; RV64IDZFH-NEXT: fle.s a0, ft1, ft0 +; RV64IDZFH-NEXT: feq.h a0, fa0, fa0 ; RV64IDZFH-NEXT: bnez a0, .LBB12_2 ; RV64IDZFH-NEXT: # %bb.1: # %start -; RV64IDZFH-NEXT: mv a1, zero -; RV64IDZFH-NEXT: j .LBB12_3 +; RV64IDZFH-NEXT: mv a0, zero +; RV64IDZFH-NEXT: ret ; RV64IDZFH-NEXT: .LBB12_2: -; RV64IDZFH-NEXT: fcvt.lu.s a1, ft0, rtz -; RV64IDZFH-NEXT: .LBB12_3: # %start -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI12_0) -; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI12_0)(a0) -; RV64IDZFH-NEXT: flt.s a2, ft1, ft0 -; RV64IDZFH-NEXT: addi a0, zero, -1 -; RV64IDZFH-NEXT: bnez a2, .LBB12_5 -; RV64IDZFH-NEXT: # %bb.4: # %start -; RV64IDZFH-NEXT: mv a0, a1 -; RV64IDZFH-NEXT: .LBB12_5: # %start +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret start: %0 = tail call i64 @llvm.fptoui.sat.i64.f16(half %a)