Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -352,6 +352,7 @@ setOperationAction(FPOpToExpand, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setOperationAction({ISD::FFLOOR, ISD::FTRUNC, ISD::FCEIL}, MVT::f32, Custom); } if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) @@ -370,6 +371,8 @@ setOperationAction(FPOpToExpand, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); + if (Subtarget.is64Bit()) + setOperationAction({ISD::FFLOOR, ISD::FTRUNC, ISD::FCEIL}, MVT::f64, Custom); } if (Subtarget.is64Bit()) @@ -1817,12 +1820,13 @@ // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain // and back. Taking care to avoid converting values that are nan or already -// correct. +// correct. Note that this handles both scalar and vector cases. // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't // have FRM dependencies modeled yet. -static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { +static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); - assert(VT.isVector() && "Unexpected type"); + MVT XLenVT = Subtarget.getXLenVT(); SDLoc DL(Op); @@ -1830,17 +1834,21 @@ SDValue Src = DAG.getFreeze(Op.getOperand(0)); // Truncate to integer and convert back to FP. - MVT IntVT = VT.changeVectorElementTypeToInteger(); + assert((VT.isVector() || + TypeSize::isKnownLE(VT.getSizeInBits(), XLenVT.getSizeInBits())) && + "can not round trip through integer"); + MVT IntVT = VT.isVector() ? VT.changeVectorElementTypeToInteger() : XLenVT; SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src); Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated); - MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); + MVT SetccVT = VT.isVector() ? MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()) : XLenVT; if (Op.getOpcode() == ISD::FCEIL) { // If the truncated value is the greater than or equal to the original // value, we've computed the ceil. Otherwise, we went the wrong way and // need to increase by 1. - // FIXME: This should use a masked operation. Handle here or in isel? + // FIXME: For vectors, this should use a masked operation. Handle here or + // in isel? SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated, DAG.getConstantFP(1.0, DL, VT)); SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT); @@ -1849,7 +1857,8 @@ // If the truncated value is the less than or equal to the original value, // we've computed the floor. Otherwise, we went the wrong way and need to // decrease by 1. - // FIXME: This should use a masked operation. Handle here or in isel? + // FIXME: For vectors, this should use a masked operation. Handle here or + // in isel? SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated, DAG.getConstantFP(1.0, DL, VT)); SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT); @@ -3417,7 +3426,7 @@ case ISD::FTRUNC: case ISD::FCEIL: case ISD::FFLOOR: - return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG); + return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG, Subtarget); case ISD::FROUND: return lowerFROUND(Op, DAG); case ISD::VECREDUCE_ADD: Index: llvm/test/CodeGen/RISCV/double-intrinsics.ll =================================================================== --- llvm/test/CodeGen/RISCV/double-intrinsics.ll +++ llvm/test/CodeGen/RISCV/double-intrinsics.ll @@ -724,11 +724,23 @@ ; ; RV64IFD-LABEL: floor_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: call floor@plt -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: flt.d a0, fa0, ft0 +; RV64IFD-NEXT: beqz a0, .LBB17_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI17_0)(a0) +; RV64IFD-NEXT: fsub.d ft0, ft0, ft1 +; RV64IFD-NEXT: .LBB17_2: +; RV64IFD-NEXT: lui a0, %hi(.LCPI17_1) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI17_1)(a0) +; RV64IFD-NEXT: fabs.d ft2, fa0 +; RV64IFD-NEXT: flt.d a0, ft2, ft1 +; RV64IFD-NEXT: beqz a0, .LBB17_4 +; RV64IFD-NEXT: # %bb.3: +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB17_4: ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: floor_f64: @@ -766,11 +778,23 @@ ; ; RV64IFD-LABEL: ceil_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: call ceil@plt -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: flt.d a0, ft0, fa0 +; RV64IFD-NEXT: beqz a0, .LBB18_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: lui a0, %hi(.LCPI18_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI18_0)(a0) +; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV64IFD-NEXT: .LBB18_2: +; RV64IFD-NEXT: lui a0, %hi(.LCPI18_1) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI18_1)(a0) +; RV64IFD-NEXT: fabs.d ft2, fa0 +; RV64IFD-NEXT: flt.d a0, ft2, ft1 +; RV64IFD-NEXT: beqz a0, .LBB18_4 +; RV64IFD-NEXT: # %bb.3: +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB18_4: ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: ceil_f64: @@ -808,11 +832,16 @@ ; ; RV64IFD-LABEL: trunc_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: call trunc@plt -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI19_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB19_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB19_2: ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: trunc_f64: Index: llvm/test/CodeGen/RISCV/double-round-conv.ll =================================================================== --- llvm/test/CodeGen/RISCV/double-round-conv.ll +++ llvm/test/CodeGen/RISCV/double-round-conv.ll @@ -638,13 +638,23 @@ ; ; RV64IFD-LABEL: test_floor_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: .cfi_def_cfa_offset 16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: .cfi_offset ra, -8 -; RV64IFD-NEXT: call floor@plt -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: flt.d a0, fa0, ft0 +; RV64IFD-NEXT: beqz a0, .LBB40_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: lui a0, %hi(.LCPI40_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI40_0)(a0) +; RV64IFD-NEXT: fsub.d ft0, ft0, ft1 +; RV64IFD-NEXT: .LBB40_2: +; RV64IFD-NEXT: lui a0, %hi(.LCPI40_1) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI40_1)(a0) +; RV64IFD-NEXT: fabs.d ft2, fa0 +; RV64IFD-NEXT: flt.d a0, ft2, ft1 +; RV64IFD-NEXT: beqz a0, .LBB40_4 +; RV64IFD-NEXT: # %bb.3: +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB40_4: ; RV64IFD-NEXT: ret %a = call double @llvm.floor.f64(double %x) ret double %a @@ -664,13 +674,23 @@ ; ; RV64IFD-LABEL: test_ceil_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: .cfi_def_cfa_offset 16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: .cfi_offset ra, -8 -; RV64IFD-NEXT: call ceil@plt -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: flt.d a0, ft0, fa0 +; RV64IFD-NEXT: beqz a0, .LBB41_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: lui a0, %hi(.LCPI41_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI41_0)(a0) +; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV64IFD-NEXT: .LBB41_2: +; RV64IFD-NEXT: lui a0, %hi(.LCPI41_1) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI41_1)(a0) +; RV64IFD-NEXT: fabs.d ft2, fa0 +; RV64IFD-NEXT: flt.d a0, ft2, ft1 +; RV64IFD-NEXT: beqz a0, .LBB41_4 +; RV64IFD-NEXT: # %bb.3: +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB41_4: ; RV64IFD-NEXT: ret %a = call double @llvm.ceil.f64(double %x) ret double %a @@ -690,13 +710,16 @@ ; ; RV64IFD-LABEL: test_trunc_double: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: .cfi_def_cfa_offset 16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: .cfi_offset ra, -8 -; RV64IFD-NEXT: call trunc@plt -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: lui a0, %hi(.LCPI42_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI42_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB42_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB42_2: ; RV64IFD-NEXT: ret %a = call double @llvm.trunc.f64(double %x) ret double %a Index: llvm/test/CodeGen/RISCV/float-intrinsics.ll =================================================================== --- llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -725,20 +725,44 @@ define float @floor_f32(float %a) nounwind { ; RV32IF-LABEL: floor_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call floorf@plt -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, fa0, ft0 +; RV32IF-NEXT: beqz a0, .LBB17_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI17_0)(a0) +; RV32IF-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB17_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI17_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB17_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB17_4: ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: floor_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: call floorf@plt -; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: flt.s a0, fa0, ft0 +; RV64IF-NEXT: beqz a0, .LBB17_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI17_0)(a0) +; RV64IF-NEXT: fsub.s ft0, ft0, ft1 +; RV64IF-NEXT: .LBB17_2: +; RV64IF-NEXT: lui a0, %hi(.LCPI17_1) +; RV64IF-NEXT: flw ft1, %lo(.LCPI17_1)(a0) +; RV64IF-NEXT: fabs.s ft2, fa0 +; RV64IF-NEXT: flt.s a0, ft2, ft1 +; RV64IF-NEXT: beqz a0, .LBB17_4 +; RV64IF-NEXT: # %bb.3: +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB17_4: ; RV64IF-NEXT: ret ; ; RV32I-LABEL: floor_f32: @@ -767,20 +791,44 @@ define float @ceil_f32(float %a) nounwind { ; RV32IF-LABEL: ceil_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call ceilf@plt -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: beqz a0, .LBB18_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI18_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI18_0)(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB18_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI18_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI18_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB18_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB18_4: ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: ceil_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: call ceilf@plt -; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: flt.s a0, ft0, fa0 +; RV64IF-NEXT: beqz a0, .LBB18_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: lui a0, %hi(.LCPI18_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI18_0)(a0) +; RV64IF-NEXT: fadd.s ft0, ft0, ft1 +; RV64IF-NEXT: .LBB18_2: +; RV64IF-NEXT: lui a0, %hi(.LCPI18_1) +; RV64IF-NEXT: flw ft1, %lo(.LCPI18_1)(a0) +; RV64IF-NEXT: fabs.s ft2, fa0 +; RV64IF-NEXT: flt.s a0, ft2, ft1 +; RV64IF-NEXT: beqz a0, .LBB18_4 +; RV64IF-NEXT: # %bb.3: +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB18_4: ; RV64IF-NEXT: ret ; ; RV32I-LABEL: ceil_f32: @@ -809,20 +857,30 @@ define float @trunc_f32(float %a) nounwind { ; RV32IF-LABEL: trunc_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call truncf@plt -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB19_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB19_2: ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: trunc_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: call truncf@plt -; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB19_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB19_2: ; RV64IF-NEXT: ret ; ; RV32I-LABEL: trunc_f32: Index: llvm/test/CodeGen/RISCV/float-round-conv-sat.ll =================================================================== --- llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -25,55 +25,72 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call floorf@plt -; RV32IF-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI1_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 -; RV32IF-NEXT: call __fixsfdi@plt -; RV32IF-NEXT: mv a2, a0 -; RV32IF-NEXT: bnez s0, .LBB1_2 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, fa0, ft0 +; RV32IF-NEXT: beqz a0, .LBB1_2 ; RV32IF-NEXT: # %bb.1: -; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI1_0)(a0) +; RV32IF-NEXT: fsub.s ft0, ft0, ft1 ; RV32IF-NEXT: .LBB1_2: ; RV32IF-NEXT: lui a0, %hi(.LCPI1_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI1_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fs0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB1_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB1_4: +; RV32IF-NEXT: lui a0, %hi(.LCPI1_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI1_2)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB1_6 +; RV32IF-NEXT: # %bb.5: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB1_6: +; RV32IF-NEXT: lui a0, %hi(.LCPI1_3) +; RV32IF-NEXT: flw ft0, %lo(.LCPI1_3)(a0) ; RV32IF-NEXT: flt.s a3, ft0, fs0 ; RV32IF-NEXT: li a0, -1 -; RV32IF-NEXT: beqz a3, .LBB1_9 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a3, .LBB1_13 +; RV32IF-NEXT: # %bb.7: ; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: beqz a2, .LBB1_10 -; RV32IF-NEXT: .LBB1_4: +; RV32IF-NEXT: beqz a2, .LBB1_14 +; RV32IF-NEXT: .LBB1_8: ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: beqz s0, .LBB1_11 -; RV32IF-NEXT: .LBB1_5: -; RV32IF-NEXT: bnez a3, .LBB1_12 -; RV32IF-NEXT: .LBB1_6: -; RV32IF-NEXT: bnez a2, .LBB1_8 -; RV32IF-NEXT: .LBB1_7: +; RV32IF-NEXT: beqz s0, .LBB1_15 +; RV32IF-NEXT: .LBB1_9: +; RV32IF-NEXT: bnez a3, .LBB1_16 +; RV32IF-NEXT: .LBB1_10: +; RV32IF-NEXT: bnez a2, .LBB1_12 +; RV32IF-NEXT: .LBB1_11: ; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: .LBB1_8: +; RV32IF-NEXT: .LBB1_12: ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB1_9: +; RV32IF-NEXT: .LBB1_13: ; RV32IF-NEXT: mv a0, a2 ; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: bnez a2, .LBB1_4 -; RV32IF-NEXT: .LBB1_10: +; RV32IF-NEXT: bnez a2, .LBB1_8 +; RV32IF-NEXT: .LBB1_14: ; RV32IF-NEXT: li a0, 0 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: bnez s0, .LBB1_5 -; RV32IF-NEXT: .LBB1_11: +; RV32IF-NEXT: bnez s0, .LBB1_9 +; RV32IF-NEXT: .LBB1_15: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: beqz a3, .LBB1_6 -; RV32IF-NEXT: .LBB1_12: +; RV32IF-NEXT: beqz a3, .LBB1_10 +; RV32IF-NEXT: .LBB1_16: ; RV32IF-NEXT: addi a1, a4, -1 -; RV32IF-NEXT: beqz a2, .LBB1_7 -; RV32IF-NEXT: j .LBB1_8 +; RV32IF-NEXT: beqz a2, .LBB1_11 +; RV32IF-NEXT: j .LBB1_12 ; ; RV64IF-LABEL: test_floor_si64: ; RV64IF: # %bb.0: @@ -109,42 +126,59 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call floorf@plt ; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, fa0, ft0 +; RV32IF-NEXT: beqz a0, .LBB3_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI3_0)(a0) +; RV32IF-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB3_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI3_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI3_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fs0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB3_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB3_4: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bnez s0, .LBB3_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB3_6 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: li a3, 0 -; RV32IF-NEXT: .LBB3_2: -; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IF-NEXT: .LBB3_6: +; RV32IF-NEXT: lui a0, %hi(.LCPI3_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI3_2)(a0) ; RV32IF-NEXT: flt.s a4, ft0, fs0 ; RV32IF-NEXT: li a2, -1 ; RV32IF-NEXT: li a0, -1 -; RV32IF-NEXT: beqz a4, .LBB3_7 -; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: beqz s0, .LBB3_8 -; RV32IF-NEXT: .LBB3_4: -; RV32IF-NEXT: bnez a4, .LBB3_6 -; RV32IF-NEXT: .LBB3_5: +; RV32IF-NEXT: beqz a4, .LBB3_11 +; RV32IF-NEXT: # %bb.7: +; RV32IF-NEXT: beqz s0, .LBB3_12 +; RV32IF-NEXT: .LBB3_8: +; RV32IF-NEXT: bnez a4, .LBB3_10 +; RV32IF-NEXT: .LBB3_9: ; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: .LBB3_6: +; RV32IF-NEXT: .LBB3_10: ; RV32IF-NEXT: mv a1, a2 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB3_7: +; RV32IF-NEXT: .LBB3_11: ; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: bnez s0, .LBB3_4 -; RV32IF-NEXT: .LBB3_8: +; RV32IF-NEXT: bnez s0, .LBB3_8 +; RV32IF-NEXT: .LBB3_12: ; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: beqz a4, .LBB3_5 -; RV32IF-NEXT: j .LBB3_6 +; RV32IF-NEXT: beqz a4, .LBB3_9 +; RV32IF-NEXT: j .LBB3_10 ; ; RV64IF-LABEL: test_floor_ui64: ; RV64IF: # %bb.0: @@ -180,55 +214,72 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call ceilf@plt -; RV32IF-NEXT: lui a0, %hi(.LCPI5_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI5_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 -; RV32IF-NEXT: call __fixsfdi@plt -; RV32IF-NEXT: mv a2, a0 -; RV32IF-NEXT: bnez s0, .LBB5_2 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: beqz a0, .LBB5_2 ; RV32IF-NEXT: # %bb.1: -; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI5_0)(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 ; RV32IF-NEXT: .LBB5_2: ; RV32IF-NEXT: lui a0, %hi(.LCPI5_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI5_1)(a0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI5_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fs0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB5_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB5_4: +; RV32IF-NEXT: lui a0, %hi(.LCPI5_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI5_2)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB5_6 +; RV32IF-NEXT: # %bb.5: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB5_6: +; RV32IF-NEXT: lui a0, %hi(.LCPI5_3) +; RV32IF-NEXT: flw ft0, %lo(.LCPI5_3)(a0) ; RV32IF-NEXT: flt.s a3, ft0, fs0 ; RV32IF-NEXT: li a0, -1 -; RV32IF-NEXT: beqz a3, .LBB5_9 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a3, .LBB5_13 +; RV32IF-NEXT: # %bb.7: ; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: beqz a2, .LBB5_10 -; RV32IF-NEXT: .LBB5_4: +; RV32IF-NEXT: beqz a2, .LBB5_14 +; RV32IF-NEXT: .LBB5_8: ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: beqz s0, .LBB5_11 -; RV32IF-NEXT: .LBB5_5: -; RV32IF-NEXT: bnez a3, .LBB5_12 -; RV32IF-NEXT: .LBB5_6: -; RV32IF-NEXT: bnez a2, .LBB5_8 -; RV32IF-NEXT: .LBB5_7: +; RV32IF-NEXT: beqz s0, .LBB5_15 +; RV32IF-NEXT: .LBB5_9: +; RV32IF-NEXT: bnez a3, .LBB5_16 +; RV32IF-NEXT: .LBB5_10: +; RV32IF-NEXT: bnez a2, .LBB5_12 +; RV32IF-NEXT: .LBB5_11: ; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: .LBB5_8: +; RV32IF-NEXT: .LBB5_12: ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB5_9: +; RV32IF-NEXT: .LBB5_13: ; RV32IF-NEXT: mv a0, a2 ; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: bnez a2, .LBB5_4 -; RV32IF-NEXT: .LBB5_10: +; RV32IF-NEXT: bnez a2, .LBB5_8 +; RV32IF-NEXT: .LBB5_14: ; RV32IF-NEXT: li a0, 0 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: bnez s0, .LBB5_5 -; RV32IF-NEXT: .LBB5_11: +; RV32IF-NEXT: bnez s0, .LBB5_9 +; RV32IF-NEXT: .LBB5_15: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: beqz a3, .LBB5_6 -; RV32IF-NEXT: .LBB5_12: +; RV32IF-NEXT: beqz a3, .LBB5_10 +; RV32IF-NEXT: .LBB5_16: ; RV32IF-NEXT: addi a1, a4, -1 -; RV32IF-NEXT: beqz a2, .LBB5_7 -; RV32IF-NEXT: j .LBB5_8 +; RV32IF-NEXT: beqz a2, .LBB5_11 +; RV32IF-NEXT: j .LBB5_12 ; ; RV64IF-LABEL: test_ceil_si64: ; RV64IF: # %bb.0: @@ -264,42 +315,59 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call ceilf@plt ; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: beqz a0, .LBB7_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB7_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI7_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI7_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fs0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB7_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB7_4: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bnez s0, .LBB7_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB7_6 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: li a3, 0 -; RV32IF-NEXT: .LBB7_2: -; RV32IF-NEXT: lui a0, %hi(.LCPI7_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32IF-NEXT: .LBB7_6: +; RV32IF-NEXT: lui a0, %hi(.LCPI7_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI7_2)(a0) ; RV32IF-NEXT: flt.s a4, ft0, fs0 ; RV32IF-NEXT: li a2, -1 ; RV32IF-NEXT: li a0, -1 -; RV32IF-NEXT: beqz a4, .LBB7_7 -; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: beqz s0, .LBB7_8 -; RV32IF-NEXT: .LBB7_4: -; RV32IF-NEXT: bnez a4, .LBB7_6 -; RV32IF-NEXT: .LBB7_5: +; RV32IF-NEXT: beqz a4, .LBB7_11 +; RV32IF-NEXT: # %bb.7: +; RV32IF-NEXT: beqz s0, .LBB7_12 +; RV32IF-NEXT: .LBB7_8: +; RV32IF-NEXT: bnez a4, .LBB7_10 +; RV32IF-NEXT: .LBB7_9: ; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: .LBB7_6: +; RV32IF-NEXT: .LBB7_10: ; RV32IF-NEXT: mv a1, a2 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB7_7: +; RV32IF-NEXT: .LBB7_11: ; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: bnez s0, .LBB7_4 -; RV32IF-NEXT: .LBB7_8: +; RV32IF-NEXT: bnez s0, .LBB7_8 +; RV32IF-NEXT: .LBB7_12: ; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: beqz a4, .LBB7_5 -; RV32IF-NEXT: j .LBB7_6 +; RV32IF-NEXT: beqz a4, .LBB7_9 +; RV32IF-NEXT: j .LBB7_10 ; ; RV64IF-LABEL: test_ceil_ui64: ; RV64IF: # %bb.0: @@ -335,55 +403,65 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call truncf@plt ; RV32IF-NEXT: lui a0, %hi(.LCPI9_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI9_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 -; RV32IF-NEXT: call __fixsfdi@plt -; RV32IF-NEXT: mv a2, a0 -; RV32IF-NEXT: bnez s0, .LBB9_2 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB9_2 ; RV32IF-NEXT: # %bb.1: -; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: fcvt.w.s a0, fs0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 ; RV32IF-NEXT: .LBB9_2: ; RV32IF-NEXT: lui a0, %hi(.LCPI9_1) ; RV32IF-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB9_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB9_4: +; RV32IF-NEXT: lui a0, %hi(.LCPI9_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI9_2)(a0) ; RV32IF-NEXT: flt.s a3, ft0, fs0 ; RV32IF-NEXT: li a0, -1 -; RV32IF-NEXT: beqz a3, .LBB9_9 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a3, .LBB9_11 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: beqz a2, .LBB9_10 -; RV32IF-NEXT: .LBB9_4: -; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: beqz s0, .LBB9_11 -; RV32IF-NEXT: .LBB9_5: -; RV32IF-NEXT: bnez a3, .LBB9_12 +; RV32IF-NEXT: beqz a2, .LBB9_12 ; RV32IF-NEXT: .LBB9_6: -; RV32IF-NEXT: bnez a2, .LBB9_8 +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: beqz s0, .LBB9_13 ; RV32IF-NEXT: .LBB9_7: -; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: bnez a3, .LBB9_14 ; RV32IF-NEXT: .LBB9_8: +; RV32IF-NEXT: bnez a2, .LBB9_10 +; RV32IF-NEXT: .LBB9_9: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: .LBB9_10: ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB9_9: +; RV32IF-NEXT: .LBB9_11: ; RV32IF-NEXT: mv a0, a2 ; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: bnez a2, .LBB9_4 -; RV32IF-NEXT: .LBB9_10: +; RV32IF-NEXT: bnez a2, .LBB9_6 +; RV32IF-NEXT: .LBB9_12: ; RV32IF-NEXT: li a0, 0 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: bnez s0, .LBB9_5 -; RV32IF-NEXT: .LBB9_11: +; RV32IF-NEXT: bnez s0, .LBB9_7 +; RV32IF-NEXT: .LBB9_13: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: beqz a3, .LBB9_6 -; RV32IF-NEXT: .LBB9_12: +; RV32IF-NEXT: beqz a3, .LBB9_8 +; RV32IF-NEXT: .LBB9_14: ; RV32IF-NEXT: addi a1, a4, -1 -; RV32IF-NEXT: beqz a2, .LBB9_7 -; RV32IF-NEXT: j .LBB9_8 +; RV32IF-NEXT: beqz a2, .LBB9_9 +; RV32IF-NEXT: j .LBB9_10 ; ; RV64IF-LABEL: test_trunc_si64: ; RV64IF: # %bb.0: @@ -419,42 +497,52 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB11_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB11_2: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bnez s0, .LBB11_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB11_4 +; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: li a3, 0 -; RV32IF-NEXT: .LBB11_2: -; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IF-NEXT: .LBB11_4: +; RV32IF-NEXT: lui a0, %hi(.LCPI11_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_1)(a0) ; RV32IF-NEXT: flt.s a4, ft0, fs0 ; RV32IF-NEXT: li a2, -1 ; RV32IF-NEXT: li a0, -1 -; RV32IF-NEXT: beqz a4, .LBB11_7 -; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: beqz s0, .LBB11_8 -; RV32IF-NEXT: .LBB11_4: -; RV32IF-NEXT: bnez a4, .LBB11_6 -; RV32IF-NEXT: .LBB11_5: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: beqz a4, .LBB11_9 +; RV32IF-NEXT: # %bb.5: +; RV32IF-NEXT: beqz s0, .LBB11_10 ; RV32IF-NEXT: .LBB11_6: +; RV32IF-NEXT: bnez a4, .LBB11_8 +; RV32IF-NEXT: .LBB11_7: +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB11_8: ; RV32IF-NEXT: mv a1, a2 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB11_7: +; RV32IF-NEXT: .LBB11_9: ; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: bnez s0, .LBB11_4 -; RV32IF-NEXT: .LBB11_8: +; RV32IF-NEXT: bnez s0, .LBB11_6 +; RV32IF-NEXT: .LBB11_10: ; RV32IF-NEXT: li a1, 0 -; RV32IF-NEXT: beqz a4, .LBB11_5 -; RV32IF-NEXT: j .LBB11_6 +; RV32IF-NEXT: beqz a4, .LBB11_7 +; RV32IF-NEXT: j .LBB11_8 ; ; RV64IF-LABEL: test_trunc_ui64: ; RV64IF: # %bb.0: Index: llvm/test/CodeGen/RISCV/float-round-conv.ll =================================================================== --- llvm/test/CodeGen/RISCV/float-round-conv.ll +++ llvm/test/CodeGen/RISCV/float-round-conv.ll @@ -56,7 +56,23 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, fa0, ft0 +; RV32IF-NEXT: beqz a0, .LBB3_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI3_0)(a0) +; RV32IF-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB3_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI3_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI3_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB3_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB3_4: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -123,7 +139,23 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, fa0, ft0 +; RV32IF-NEXT: beqz a0, .LBB7_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI7_0)(a0) +; RV32IF-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB7_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI7_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI7_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB7_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB7_4: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -190,7 +222,23 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: beqz a0, .LBB11_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB11_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI11_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI11_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB11_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB11_4: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -257,7 +305,23 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: beqz a0, .LBB15_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI15_0)(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB15_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI15_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI15_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB15_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB15_4: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -324,7 +388,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB19_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB19_2: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -391,7 +464,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI23_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB23_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB23_2: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -698,24 +780,44 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_floor_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: .cfi_def_cfa_offset 16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call floorf@plt -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, fa0, ft0 +; RV32IF-NEXT: beqz a0, .LBB40_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI40_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI40_0)(a0) +; RV32IF-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB40_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI40_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI40_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB40_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB40_4: ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_floor_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: .cfi_def_cfa_offset 16 -; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: .cfi_offset ra, -8 -; RV64IF-NEXT: call floorf@plt -; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: flt.s a0, fa0, ft0 +; RV64IF-NEXT: beqz a0, .LBB40_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: lui a0, %hi(.LCPI40_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI40_0)(a0) +; RV64IF-NEXT: fsub.s ft0, ft0, ft1 +; RV64IF-NEXT: .LBB40_2: +; RV64IF-NEXT: lui a0, %hi(.LCPI40_1) +; RV64IF-NEXT: flw ft1, %lo(.LCPI40_1)(a0) +; RV64IF-NEXT: fabs.s ft2, fa0 +; RV64IF-NEXT: flt.s a0, ft2, ft1 +; RV64IF-NEXT: beqz a0, .LBB40_4 +; RV64IF-NEXT: # %bb.3: +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB40_4: ; RV64IF-NEXT: ret %a = call float @llvm.floor.f32(float %x) ret float %a @@ -745,24 +847,44 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_ceil_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: .cfi_def_cfa_offset 16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call ceilf@plt -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: beqz a0, .LBB41_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI41_0) +; RV32IF-NEXT: flw ft1, %lo(.LCPI41_0)(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: .LBB41_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI41_1) +; RV32IF-NEXT: flw ft1, %lo(.LCPI41_1)(a0) +; RV32IF-NEXT: fabs.s ft2, fa0 +; RV32IF-NEXT: flt.s a0, ft2, ft1 +; RV32IF-NEXT: beqz a0, .LBB41_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB41_4: ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_ceil_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: .cfi_def_cfa_offset 16 -; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: .cfi_offset ra, -8 -; RV64IF-NEXT: call ceilf@plt -; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: flt.s a0, ft0, fa0 +; RV64IF-NEXT: beqz a0, .LBB41_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: lui a0, %hi(.LCPI41_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI41_0)(a0) +; RV64IF-NEXT: fadd.s ft0, ft0, ft1 +; RV64IF-NEXT: .LBB41_2: +; RV64IF-NEXT: lui a0, %hi(.LCPI41_1) +; RV64IF-NEXT: flw ft1, %lo(.LCPI41_1)(a0) +; RV64IF-NEXT: fabs.s ft2, fa0 +; RV64IF-NEXT: flt.s a0, ft2, ft1 +; RV64IF-NEXT: beqz a0, .LBB41_4 +; RV64IF-NEXT: # %bb.3: +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB41_4: ; RV64IF-NEXT: ret %a = call float @llvm.ceil.f32(float %x) ret float %a @@ -792,24 +914,30 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_trunc_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: .cfi_def_cfa_offset 16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call truncf@plt -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: lui a0, %hi(.LCPI42_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI42_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB42_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB42_2: ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_trunc_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: .cfi_def_cfa_offset 16 -; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: .cfi_offset ra, -8 -; RV64IF-NEXT: call truncf@plt -; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: lui a0, %hi(.LCPI42_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI42_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB42_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB42_2: ; RV64IF-NEXT: ret %a = call float @llvm.trunc.f32(float %x) ret float %a Index: llvm/test/CodeGen/RISCV/half-intrinsics.ll =================================================================== --- llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -1370,46 +1370,94 @@ define half @floor_f16(half %a) nounwind { ; RV32IZFH-LABEL: floor_f16: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft0, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB17_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI17_0)(a0) +; RV32IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB17_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI17_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB17_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB17_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: floor_f16: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call floorf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IZFH-NEXT: flt.s a0, ft0, ft1 +; RV64IZFH-NEXT: beqz a0, .LBB17_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI17_0)(a0) +; RV64IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV64IZFH-NEXT: .LBB17_2: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI17_1) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI17_1)(a0) +; RV64IZFH-NEXT: fabs.s ft3, ft0 +; RV64IZFH-NEXT: flt.s a0, ft3, ft2 +; RV64IZFH-NEXT: beqz a0, .LBB17_4 +; RV64IZFH-NEXT: # %bb.3: +; RV64IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFH-NEXT: .LBB17_4: +; RV64IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: floor_f16: ; RV32IDZFH: # %bb.0: -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IDZFH-NEXT: call floorf@plt -; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IDZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IDZFH-NEXT: flt.s a0, ft0, ft1 +; RV32IDZFH-NEXT: beqz a0, .LBB17_2 +; RV32IDZFH-NEXT: # %bb.1: +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI17_0)(a0) +; RV32IDZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV32IDZFH-NEXT: .LBB17_2: +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI17_1)(a0) +; RV32IDZFH-NEXT: fabs.s ft3, ft0 +; RV32IDZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IDZFH-NEXT: beqz a0, .LBB17_4 +; RV32IDZFH-NEXT: # %bb.3: +; RV32IDZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IDZFH-NEXT: .LBB17_4: +; RV32IDZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: floor_f16: ; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: addi sp, sp, -16 -; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IDZFH-NEXT: call floorf@plt -; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IDZFH-NEXT: addi sp, sp, 16 +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IDZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IDZFH-NEXT: flt.s a0, ft0, ft1 +; RV64IDZFH-NEXT: beqz a0, .LBB17_2 +; RV64IDZFH-NEXT: # %bb.1: +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI17_0)(a0) +; RV64IDZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV64IDZFH-NEXT: .LBB17_2: +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI17_1) +; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI17_1)(a0) +; RV64IDZFH-NEXT: fabs.s ft3, ft0 +; RV64IDZFH-NEXT: flt.s a0, ft3, ft2 +; RV64IDZFH-NEXT: beqz a0, .LBB17_4 +; RV64IDZFH-NEXT: # %bb.3: +; RV64IDZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IDZFH-NEXT: .LBB17_4: +; RV64IDZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: floor_f16: @@ -1446,46 +1494,94 @@ define half @ceil_f16(half %a) nounwind { ; RV32IZFH-LABEL: ceil_f16: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB18_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI18_0)(a0) +; RV32IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB18_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI18_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI18_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB18_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB18_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: ceil_f16: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call ceilf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IZFH-NEXT: flt.s a0, ft1, ft0 +; RV64IZFH-NEXT: beqz a0, .LBB18_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI18_0)(a0) +; RV64IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV64IZFH-NEXT: .LBB18_2: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI18_1) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI18_1)(a0) +; RV64IZFH-NEXT: fabs.s ft3, ft0 +; RV64IZFH-NEXT: flt.s a0, ft3, ft2 +; RV64IZFH-NEXT: beqz a0, .LBB18_4 +; RV64IZFH-NEXT: # %bb.3: +; RV64IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFH-NEXT: .LBB18_4: +; RV64IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: ceil_f16: ; RV32IDZFH: # %bb.0: -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IDZFH-NEXT: call ceilf@plt -; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IDZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IDZFH-NEXT: flt.s a0, ft1, ft0 +; RV32IDZFH-NEXT: beqz a0, .LBB18_2 +; RV32IDZFH-NEXT: # %bb.1: +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI18_0)(a0) +; RV32IDZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV32IDZFH-NEXT: .LBB18_2: +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI18_1) +; RV32IDZFH-NEXT: flw ft2, %lo(.LCPI18_1)(a0) +; RV32IDZFH-NEXT: fabs.s ft3, ft0 +; RV32IDZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IDZFH-NEXT: beqz a0, .LBB18_4 +; RV32IDZFH-NEXT: # %bb.3: +; RV32IDZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IDZFH-NEXT: .LBB18_4: +; RV32IDZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: ceil_f16: ; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: addi sp, sp, -16 -; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IDZFH-NEXT: call ceilf@plt -; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IDZFH-NEXT: addi sp, sp, 16 +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IDZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IDZFH-NEXT: flt.s a0, ft1, ft0 +; RV64IDZFH-NEXT: beqz a0, .LBB18_2 +; RV64IDZFH-NEXT: # %bb.1: +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI18_0) +; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI18_0)(a0) +; RV64IDZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV64IDZFH-NEXT: .LBB18_2: +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI18_1) +; RV64IDZFH-NEXT: flw ft2, %lo(.LCPI18_1)(a0) +; RV64IDZFH-NEXT: fabs.s ft3, ft0 +; RV64IDZFH-NEXT: flt.s a0, ft3, ft2 +; RV64IDZFH-NEXT: beqz a0, .LBB18_4 +; RV64IDZFH-NEXT: # %bb.3: +; RV64IDZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IDZFH-NEXT: .LBB18_4: +; RV64IDZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: ceil_f16: @@ -1522,46 +1618,66 @@ define half @trunc_f16(half %a) nounwind { ; RV32IZFH-LABEL: trunc_f16: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fabs.s ft2, ft0 +; RV32IZFH-NEXT: flt.s a0, ft2, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB19_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB19_2: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: trunc_f16: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call truncf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fabs.s ft2, ft0 +; RV64IZFH-NEXT: flt.s a0, ft2, ft1 +; RV64IZFH-NEXT: beqz a0, .LBB19_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFH-NEXT: .LBB19_2: +; RV64IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: trunc_f16: ; RV32IDZFH: # %bb.0: -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IDZFH-NEXT: call truncf@plt -; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV32IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IDZFH-NEXT: fabs.s ft2, ft0 +; RV32IDZFH-NEXT: flt.s a0, ft2, ft1 +; RV32IDZFH-NEXT: beqz a0, .LBB19_2 +; RV32IDZFH-NEXT: # %bb.1: +; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IDZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IDZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IDZFH-NEXT: .LBB19_2: +; RV32IDZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: trunc_f16: ; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: addi sp, sp, -16 -; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IDZFH-NEXT: call truncf@plt -; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IDZFH-NEXT: addi sp, sp, 16 +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV64IDZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IDZFH-NEXT: fabs.s ft2, ft0 +; RV64IDZFH-NEXT: flt.s a0, ft2, ft1 +; RV64IDZFH-NEXT: beqz a0, .LBB19_2 +; RV64IDZFH-NEXT: # %bb.1: +; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IDZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IDZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IDZFH-NEXT: .LBB19_2: +; RV64IDZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: trunc_f16: Index: llvm/test/CodeGen/RISCV/half-round-conv-sat.ll =================================================================== --- llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -25,58 +25,74 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft0, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB1_2 +; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 -; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB1_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI1_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB1_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB1_4: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_2) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI1_2)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fle.s s0, ft1, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: mv a2, a0 -; RV32IZFH-NEXT: bnez s0, .LBB1_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB1_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: li a2, 0 -; RV32IZFH-NEXT: .LBB1_2: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; RV32IZFH-NEXT: .LBB1_6: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_3) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_3)(a0) ; RV32IZFH-NEXT: flt.s a3, ft0, fs0 ; RV32IZFH-NEXT: li a0, -1 -; RV32IZFH-NEXT: beqz a3, .LBB1_9 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a3, .LBB1_13 +; RV32IZFH-NEXT: # %bb.7: ; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB1_10 -; RV32IZFH-NEXT: .LBB1_4: +; RV32IZFH-NEXT: beqz a2, .LBB1_14 +; RV32IZFH-NEXT: .LBB1_8: ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB1_11 -; RV32IZFH-NEXT: .LBB1_5: -; RV32IZFH-NEXT: bnez a3, .LBB1_12 -; RV32IZFH-NEXT: .LBB1_6: -; RV32IZFH-NEXT: bnez a2, .LBB1_8 -; RV32IZFH-NEXT: .LBB1_7: +; RV32IZFH-NEXT: beqz s0, .LBB1_15 +; RV32IZFH-NEXT: .LBB1_9: +; RV32IZFH-NEXT: bnez a3, .LBB1_16 +; RV32IZFH-NEXT: .LBB1_10: +; RV32IZFH-NEXT: bnez a2, .LBB1_12 +; RV32IZFH-NEXT: .LBB1_11: ; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: .LBB1_8: +; RV32IZFH-NEXT: .LBB1_12: ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret -; RV32IZFH-NEXT: .LBB1_9: +; RV32IZFH-NEXT: .LBB1_13: ; RV32IZFH-NEXT: mv a0, a2 ; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: bnez a2, .LBB1_4 -; RV32IZFH-NEXT: .LBB1_10: +; RV32IZFH-NEXT: bnez a2, .LBB1_8 +; RV32IZFH-NEXT: .LBB1_14: ; RV32IZFH-NEXT: li a0, 0 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB1_5 -; RV32IZFH-NEXT: .LBB1_11: +; RV32IZFH-NEXT: bnez s0, .LBB1_9 +; RV32IZFH-NEXT: .LBB1_15: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: beqz a3, .LBB1_6 -; RV32IZFH-NEXT: .LBB1_12: +; RV32IZFH-NEXT: beqz a3, .LBB1_10 +; RV32IZFH-NEXT: .LBB1_16: ; RV32IZFH-NEXT: addi a1, a4, -1 -; RV32IZFH-NEXT: beqz a2, .LBB1_7 -; RV32IZFH-NEXT: j .LBB1_8 +; RV32IZFH-NEXT: beqz a2, .LBB1_11 +; RV32IZFH-NEXT: j .LBB1_12 ; ; RV64IZFH-LABEL: test_floor_si64: ; RV64IZFH: # %bb.0: @@ -112,45 +128,61 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft0, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB3_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB3_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI3_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB3_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB3_4: +; RV32IZFH-NEXT: fcvt.h.s ft0, ft0 ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: mv a3, a0 -; RV32IZFH-NEXT: bnez s0, .LBB3_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB3_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: li a3, 0 -; RV32IZFH-NEXT: .LBB3_2: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: .LBB3_6: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_2)(a0) ; RV32IZFH-NEXT: flt.s a4, ft0, fs0 ; RV32IZFH-NEXT: li a2, -1 ; RV32IZFH-NEXT: li a0, -1 -; RV32IZFH-NEXT: beqz a4, .LBB3_7 -; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: beqz s0, .LBB3_8 -; RV32IZFH-NEXT: .LBB3_4: -; RV32IZFH-NEXT: bnez a4, .LBB3_6 -; RV32IZFH-NEXT: .LBB3_5: +; RV32IZFH-NEXT: beqz a4, .LBB3_11 +; RV32IZFH-NEXT: # %bb.7: +; RV32IZFH-NEXT: beqz s0, .LBB3_12 +; RV32IZFH-NEXT: .LBB3_8: +; RV32IZFH-NEXT: bnez a4, .LBB3_10 +; RV32IZFH-NEXT: .LBB3_9: ; RV32IZFH-NEXT: mv a2, a1 -; RV32IZFH-NEXT: .LBB3_6: +; RV32IZFH-NEXT: .LBB3_10: ; RV32IZFH-NEXT: mv a1, a2 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret -; RV32IZFH-NEXT: .LBB3_7: +; RV32IZFH-NEXT: .LBB3_11: ; RV32IZFH-NEXT: mv a0, a3 -; RV32IZFH-NEXT: bnez s0, .LBB3_4 -; RV32IZFH-NEXT: .LBB3_8: +; RV32IZFH-NEXT: bnez s0, .LBB3_8 +; RV32IZFH-NEXT: .LBB3_12: ; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: beqz a4, .LBB3_5 -; RV32IZFH-NEXT: j .LBB3_6 +; RV32IZFH-NEXT: beqz a4, .LBB3_9 +; RV32IZFH-NEXT: j .LBB3_10 ; ; RV64IZFH-LABEL: test_floor_ui64: ; RV64IZFH: # %bb.0: @@ -186,58 +218,74 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB5_2 +; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 -; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI5_0)(a0) +; RV32IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB5_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI5_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB5_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB5_4: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_2) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI5_2)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fle.s s0, ft1, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: mv a2, a0 -; RV32IZFH-NEXT: bnez s0, .LBB5_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB5_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: li a2, 0 -; RV32IZFH-NEXT: .LBB5_2: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_1)(a0) +; RV32IZFH-NEXT: .LBB5_6: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_3) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_3)(a0) ; RV32IZFH-NEXT: flt.s a3, ft0, fs0 ; RV32IZFH-NEXT: li a0, -1 -; RV32IZFH-NEXT: beqz a3, .LBB5_9 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a3, .LBB5_13 +; RV32IZFH-NEXT: # %bb.7: ; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB5_10 -; RV32IZFH-NEXT: .LBB5_4: +; RV32IZFH-NEXT: beqz a2, .LBB5_14 +; RV32IZFH-NEXT: .LBB5_8: ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB5_11 -; RV32IZFH-NEXT: .LBB5_5: -; RV32IZFH-NEXT: bnez a3, .LBB5_12 -; RV32IZFH-NEXT: .LBB5_6: -; RV32IZFH-NEXT: bnez a2, .LBB5_8 -; RV32IZFH-NEXT: .LBB5_7: +; RV32IZFH-NEXT: beqz s0, .LBB5_15 +; RV32IZFH-NEXT: .LBB5_9: +; RV32IZFH-NEXT: bnez a3, .LBB5_16 +; RV32IZFH-NEXT: .LBB5_10: +; RV32IZFH-NEXT: bnez a2, .LBB5_12 +; RV32IZFH-NEXT: .LBB5_11: ; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: .LBB5_8: +; RV32IZFH-NEXT: .LBB5_12: ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret -; RV32IZFH-NEXT: .LBB5_9: +; RV32IZFH-NEXT: .LBB5_13: ; RV32IZFH-NEXT: mv a0, a2 ; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: bnez a2, .LBB5_4 -; RV32IZFH-NEXT: .LBB5_10: +; RV32IZFH-NEXT: bnez a2, .LBB5_8 +; RV32IZFH-NEXT: .LBB5_14: ; RV32IZFH-NEXT: li a0, 0 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB5_5 -; RV32IZFH-NEXT: .LBB5_11: +; RV32IZFH-NEXT: bnez s0, .LBB5_9 +; RV32IZFH-NEXT: .LBB5_15: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: beqz a3, .LBB5_6 -; RV32IZFH-NEXT: .LBB5_12: +; RV32IZFH-NEXT: beqz a3, .LBB5_10 +; RV32IZFH-NEXT: .LBB5_16: ; RV32IZFH-NEXT: addi a1, a4, -1 -; RV32IZFH-NEXT: beqz a2, .LBB5_7 -; RV32IZFH-NEXT: j .LBB5_8 +; RV32IZFH-NEXT: beqz a2, .LBB5_11 +; RV32IZFH-NEXT: j .LBB5_12 ; ; RV64IZFH-LABEL: test_ceil_si64: ; RV64IZFH: # %bb.0: @@ -273,45 +321,61 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI7_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB7_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB7_4: +; RV32IZFH-NEXT: fcvt.h.s ft0, ft0 ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: mv a3, a0 -; RV32IZFH-NEXT: bnez s0, .LBB7_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB7_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: li a3, 0 -; RV32IZFH-NEXT: .LBB7_2: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: .LBB7_6: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI7_2)(a0) ; RV32IZFH-NEXT: flt.s a4, ft0, fs0 ; RV32IZFH-NEXT: li a2, -1 ; RV32IZFH-NEXT: li a0, -1 -; RV32IZFH-NEXT: beqz a4, .LBB7_7 -; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: beqz s0, .LBB7_8 -; RV32IZFH-NEXT: .LBB7_4: -; RV32IZFH-NEXT: bnez a4, .LBB7_6 -; RV32IZFH-NEXT: .LBB7_5: +; RV32IZFH-NEXT: beqz a4, .LBB7_11 +; RV32IZFH-NEXT: # %bb.7: +; RV32IZFH-NEXT: beqz s0, .LBB7_12 +; RV32IZFH-NEXT: .LBB7_8: +; RV32IZFH-NEXT: bnez a4, .LBB7_10 +; RV32IZFH-NEXT: .LBB7_9: ; RV32IZFH-NEXT: mv a2, a1 -; RV32IZFH-NEXT: .LBB7_6: +; RV32IZFH-NEXT: .LBB7_10: ; RV32IZFH-NEXT: mv a1, a2 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret -; RV32IZFH-NEXT: .LBB7_7: +; RV32IZFH-NEXT: .LBB7_11: ; RV32IZFH-NEXT: mv a0, a3 -; RV32IZFH-NEXT: bnez s0, .LBB7_4 -; RV32IZFH-NEXT: .LBB7_8: +; RV32IZFH-NEXT: bnez s0, .LBB7_8 +; RV32IZFH-NEXT: .LBB7_12: ; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: beqz a4, .LBB7_5 -; RV32IZFH-NEXT: j .LBB7_6 +; RV32IZFH-NEXT: beqz a4, .LBB7_9 +; RV32IZFH-NEXT: j .LBB7_10 ; ; RV64IZFH-LABEL: test_ceil_ui64: ; RV64IZFH: # %bb.0: @@ -347,58 +411,67 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt ; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 -; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI9_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fabs.s ft2, ft0 +; RV32IZFH-NEXT: flt.s a0, ft2, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB9_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB9_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI9_1)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft0, ft0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fle.s s0, ft1, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: mv a2, a0 -; RV32IZFH-NEXT: bnez s0, .LBB9_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB9_4 +; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: li a2, 0 -; RV32IZFH-NEXT: .LBB9_2: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IZFH-NEXT: .LBB9_4: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_2)(a0) ; RV32IZFH-NEXT: flt.s a3, ft0, fs0 ; RV32IZFH-NEXT: li a0, -1 -; RV32IZFH-NEXT: beqz a3, .LBB9_9 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a3, .LBB9_11 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB9_10 -; RV32IZFH-NEXT: .LBB9_4: -; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB9_11 -; RV32IZFH-NEXT: .LBB9_5: -; RV32IZFH-NEXT: bnez a3, .LBB9_12 +; RV32IZFH-NEXT: beqz a2, .LBB9_12 ; RV32IZFH-NEXT: .LBB9_6: -; RV32IZFH-NEXT: bnez a2, .LBB9_8 +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB9_13 ; RV32IZFH-NEXT: .LBB9_7: -; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: bnez a3, .LBB9_14 ; RV32IZFH-NEXT: .LBB9_8: +; RV32IZFH-NEXT: bnez a2, .LBB9_10 +; RV32IZFH-NEXT: .LBB9_9: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: .LBB9_10: ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret -; RV32IZFH-NEXT: .LBB9_9: +; RV32IZFH-NEXT: .LBB9_11: ; RV32IZFH-NEXT: mv a0, a2 ; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: bnez a2, .LBB9_4 -; RV32IZFH-NEXT: .LBB9_10: +; RV32IZFH-NEXT: bnez a2, .LBB9_6 +; RV32IZFH-NEXT: .LBB9_12: ; RV32IZFH-NEXT: li a0, 0 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB9_5 -; RV32IZFH-NEXT: .LBB9_11: +; RV32IZFH-NEXT: bnez s0, .LBB9_7 +; RV32IZFH-NEXT: .LBB9_13: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: beqz a3, .LBB9_6 -; RV32IZFH-NEXT: .LBB9_12: +; RV32IZFH-NEXT: beqz a3, .LBB9_8 +; RV32IZFH-NEXT: .LBB9_14: ; RV32IZFH-NEXT: addi a1, a4, -1 -; RV32IZFH-NEXT: beqz a2, .LBB9_7 -; RV32IZFH-NEXT: j .LBB9_8 +; RV32IZFH-NEXT: beqz a2, .LBB9_9 +; RV32IZFH-NEXT: j .LBB9_10 ; ; RV64IZFH-LABEL: test_trunc_si64: ; RV64IZFH: # %bb.0: @@ -434,45 +507,54 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fabs.s ft2, ft0 +; RV32IZFH-NEXT: flt.s a0, ft2, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB11_2: +; RV32IZFH-NEXT: fcvt.h.s ft0, ft0 ; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt ; RV32IZFH-NEXT: mv a3, a0 -; RV32IZFH-NEXT: bnez s0, .LBB11_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB11_4 +; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: li a3, 0 -; RV32IZFH-NEXT: .LBB11_2: -; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: .LBB11_4: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_1)(a0) ; RV32IZFH-NEXT: flt.s a4, ft0, fs0 ; RV32IZFH-NEXT: li a2, -1 ; RV32IZFH-NEXT: li a0, -1 -; RV32IZFH-NEXT: beqz a4, .LBB11_7 -; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: beqz s0, .LBB11_8 -; RV32IZFH-NEXT: .LBB11_4: -; RV32IZFH-NEXT: bnez a4, .LBB11_6 -; RV32IZFH-NEXT: .LBB11_5: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: beqz a4, .LBB11_9 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: beqz s0, .LBB11_10 ; RV32IZFH-NEXT: .LBB11_6: +; RV32IZFH-NEXT: bnez a4, .LBB11_8 +; RV32IZFH-NEXT: .LBB11_7: +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB11_8: ; RV32IZFH-NEXT: mv a1, a2 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret -; RV32IZFH-NEXT: .LBB11_7: +; RV32IZFH-NEXT: .LBB11_9: ; RV32IZFH-NEXT: mv a0, a3 -; RV32IZFH-NEXT: bnez s0, .LBB11_4 -; RV32IZFH-NEXT: .LBB11_8: +; RV32IZFH-NEXT: bnez s0, .LBB11_6 +; RV32IZFH-NEXT: .LBB11_10: ; RV32IZFH-NEXT: li a1, 0 -; RV32IZFH-NEXT: beqz a4, .LBB11_5 -; RV32IZFH-NEXT: j .LBB11_6 +; RV32IZFH-NEXT: beqz a4, .LBB11_7 +; RV32IZFH-NEXT: j .LBB11_8 ; ; RV64IZFH-LABEL: test_trunc_ui64: ; RV64IZFH: # %bb.0: Index: llvm/test/CodeGen/RISCV/half-round-conv.ll =================================================================== --- llvm/test/CodeGen/RISCV/half-round-conv.ll +++ llvm/test/CodeGen/RISCV/half-round-conv.ll @@ -51,9 +51,25 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft0, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB3_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB3_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI3_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB3_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB3_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -115,9 +131,25 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft0, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI7_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB7_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB7_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -179,9 +211,25 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB11_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI11_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB11_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB11_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -243,9 +291,25 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB15_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI15_0)(a0) +; RV32IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB15_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI15_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB15_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB15_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -307,9 +371,18 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fabs.s ft2, ft0 +; RV32IZFH-NEXT: flt.s a0, ft2, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB19_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB19_2: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -371,9 +444,18 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI23_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fabs.s ft2, ft0 +; RV32IZFH-NEXT: flt.s a0, ft2, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB23_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB23_2: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -668,28 +750,48 @@ ; RV64IFD-NEXT: ret ; RV32IZFH-LABEL: test_floor_half: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft0, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB40_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI40_0)(a0) +; RV32IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB40_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI40_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI40_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB40_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB40_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_floor_half: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call floorf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IZFH-NEXT: flt.s a0, ft0, ft1 +; RV64IZFH-NEXT: beqz a0, .LBB40_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI40_0) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI40_0)(a0) +; RV64IZFH-NEXT: fsub.s ft1, ft1, ft2 +; RV64IZFH-NEXT: .LBB40_2: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI40_1) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI40_1)(a0) +; RV64IZFH-NEXT: fabs.s ft3, ft0 +; RV64IZFH-NEXT: flt.s a0, ft3, ft2 +; RV64IZFH-NEXT: beqz a0, .LBB40_4 +; RV64IZFH-NEXT: # %bb.3: +; RV64IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFH-NEXT: .LBB40_4: +; RV64IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IZFH-NEXT: ret %a = call half @llvm.floor.f16(half %x) ret half %a @@ -719,28 +821,48 @@ ; RV64IFD-NEXT: ret ; RV32IZFH-LABEL: test_ceil_half: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: flt.s a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB41_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI41_0) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI41_0)(a0) +; RV32IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV32IZFH-NEXT: .LBB41_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI41_1) +; RV32IZFH-NEXT: flw ft2, %lo(.LCPI41_1)(a0) +; RV32IZFH-NEXT: fabs.s ft3, ft0 +; RV32IZFH-NEXT: flt.s a0, ft3, ft2 +; RV32IZFH-NEXT: beqz a0, .LBB41_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB41_4: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_ceil_half: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call ceilf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IZFH-NEXT: flt.s a0, ft1, ft0 +; RV64IZFH-NEXT: beqz a0, .LBB41_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI41_0) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI41_0)(a0) +; RV64IZFH-NEXT: fadd.s ft1, ft1, ft2 +; RV64IZFH-NEXT: .LBB41_2: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI41_1) +; RV64IZFH-NEXT: flw ft2, %lo(.LCPI41_1)(a0) +; RV64IZFH-NEXT: fabs.s ft3, ft0 +; RV64IZFH-NEXT: flt.s a0, ft3, ft2 +; RV64IZFH-NEXT: beqz a0, .LBB41_4 +; RV64IZFH-NEXT: # %bb.3: +; RV64IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFH-NEXT: .LBB41_4: +; RV64IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IZFH-NEXT: ret %a = call half @llvm.ceil.f16(half %x) ret half %a @@ -770,28 +892,34 @@ ; RV64IFD-NEXT: ret ; RV32IZFH-LABEL: test_trunc_half: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV32IZFH-NEXT: flw ft1, %lo(.LCPI42_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV32IZFH-NEXT: fabs.s ft2, ft0 +; RV32IZFH-NEXT: flt.s a0, ft2, ft1 +; RV32IZFH-NEXT: beqz a0, .LBB42_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IZFH-NEXT: fcvt.s.w ft1, a0 +; RV32IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV32IZFH-NEXT: .LBB42_2: +; RV32IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_trunc_half: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call truncf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: lui a0, %hi(.LCPI42_0) +; RV64IZFH-NEXT: flw ft1, %lo(.LCPI42_0)(a0) +; RV64IZFH-NEXT: fcvt.s.h ft0, fa0 +; RV64IZFH-NEXT: fabs.s ft2, ft0 +; RV64IZFH-NEXT: flt.s a0, ft2, ft1 +; RV64IZFH-NEXT: beqz a0, .LBB42_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IZFH-NEXT: fcvt.s.l ft1, a0 +; RV64IZFH-NEXT: fsgnj.s ft0, ft1, ft0 +; RV64IZFH-NEXT: .LBB42_2: +; RV64IZFH-NEXT: fcvt.h.s fa0, ft0 ; RV64IZFH-NEXT: ret %a = call half @llvm.trunc.f16(half %x) ret half %a