diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -105,6 +105,14 @@ // is passed as a TargetConstant operand using the RISCVFPRndMode enum. FCVT_W_RV64, FCVT_WU_RV64, + + // Rounds an FP value to its corresponding integer in the same FP format. + // First operand is the value to round, the second operand is the largest + // integer that can be represented exactly in the FP format. This will be + // expanded into multiple instructions and basic blocks with a custom + // inserter. + FROUND, + // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target // (returns (Lo, Hi)). It takes a chain operand. READ_CYCLE_WIDE, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -323,6 +323,12 @@ if (Subtarget.hasStdExtZfh()) { setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); + setOperationAction(ISD::FCEIL, MVT::f16, Custom); + setOperationAction(ISD::FFLOOR, MVT::f16, Custom); + setOperationAction(ISD::FTRUNC, MVT::f16, Custom); + setOperationAction(ISD::FRINT, MVT::f16, Custom); + setOperationAction(ISD::FROUND, MVT::f16, Custom); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setCondCodeAction(FPCCToExpand, MVT::f16, Expand); @@ -330,11 +336,9 @@ setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f16, Expand); - setOperationAction({ISD::FREM, ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, - ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC, - ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, - ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FLOG, - ISD::FLOG2, ISD::FLOG10}, + setOperationAction({ISD::FREM, ISD::FNEARBYINT, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, + ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10}, MVT::f16, Promote); // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have @@ -352,6 +356,12 @@ if (Subtarget.hasStdExtF()) { setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Custom); + setOperationAction(ISD::FFLOOR, MVT::f32, Custom); + setOperationAction(ISD::FTRUNC, MVT::f32, Custom); + setOperationAction(ISD::FRINT, MVT::f32, Custom); + setOperationAction(ISD::FROUND, MVT::f32, Custom); + setOperationAction(ISD::FROUNDEVEN, MVT::f32, Custom); setCondCodeAction(FPCCToExpand, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Custom); @@ -366,6 +376,14 @@ if (Subtarget.hasStdExtD()) { setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::FCEIL, MVT::f64, Custom); + setOperationAction(ISD::FFLOOR, MVT::f64, Custom); + setOperationAction(ISD::FTRUNC, MVT::f64, Custom); + setOperationAction(ISD::FRINT, MVT::f64, Custom); + setOperationAction(ISD::FROUND, MVT::f64, Custom); + setOperationAction(ISD::FROUNDEVEN, MVT::f64, Custom); + } setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); @@ -1978,6 +1996,8 @@ case ISD::FROUND: case ISD::VP_FROUND: return RISCVFPRndMode::RMM; + case ISD::FRINT: + return RISCVFPRndMode::DYN; } return RISCVFPRndMode::Invalid; @@ -2085,6 +2105,34 @@ return convertFromScalableVector(VT, Truncated, DAG, Subtarget); } +static SDValue +lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + if (VT.isVector()) + return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + + if (DAG.shouldOptForSize()) + return SDValue(); + + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + + // Create an integer the size of the mantissa with the MSB set. This and all + // values larger than it don't have any fractional bits so don't need to be + // converted. + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + unsigned Precision = APFloat::semanticsPrecision(FltSem); + APFloat MaxVal = APFloat(FltSem); + MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), + /*IsSigned*/ false, APFloat::rmNearestTiesToEven); + SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); + + RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); + return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode, + DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); +} + struct VIDSequence { int64_t StepNumerator; unsigned StepDenominator; @@ -3711,9 +3759,10 @@ case ISD::FTRUNC: case ISD::FCEIL: case ISD::FFLOOR: + case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: - return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: @@ -10720,6 +10769,113 @@ return BB; } +static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, + const RISCVSubtarget &Subtarget) { + unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; + const TargetRegisterClass *RC; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case RISCV::PseudoFROUND_H: + CmpOpc = RISCV::FLT_H; + F2IOpc = RISCV::FCVT_W_H; + I2FOpc = RISCV::FCVT_H_W; + FSGNJOpc = RISCV::FSGNJ_H; + FSGNJXOpc = RISCV::FSGNJX_H; + RC = &RISCV::FPR16RegClass; + break; + case RISCV::PseudoFROUND_S: + CmpOpc = RISCV::FLT_S; + F2IOpc = RISCV::FCVT_W_S; + I2FOpc = RISCV::FCVT_S_W; + FSGNJOpc = RISCV::FSGNJ_S; + FSGNJXOpc = RISCV::FSGNJX_S; + RC = &RISCV::FPR32RegClass; + break; + case RISCV::PseudoFROUND_D: + assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); + CmpOpc = RISCV::FLT_D; + F2IOpc = RISCV::FCVT_L_D; + I2FOpc = RISCV::FCVT_D_L; + FSGNJOpc = RISCV::FSGNJ_D; + FSGNJXOpc = RISCV::FSGNJX_D; + RC = &RISCV::FPR64RegClass; + break; + } + + const BasicBlock *BB = MBB->getBasicBlock(); + DebugLoc DL = MI.getDebugLoc(); + MachineFunction::iterator I = ++MBB->getIterator(); + + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); + MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); + + F->insert(I, CvtMBB); + F->insert(I, DoneMBB); + // Move all instructions after the sequence to DoneMBB. + DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI), + MBB->end()); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi nodes for the selects. + DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); + // Set the successors for MBB. + MBB->addSuccessor(CvtMBB); + MBB->addSuccessor(DoneMBB); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register MaxReg = MI.getOperand(2).getReg(); + int64_t FRM = MI.getOperand(3).getImm(); + + const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + Register FabsReg = MRI.createVirtualRegister(RC); + BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); + + // Compare the FP value to the max value. + Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + auto MIB = + BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); + if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) + MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); + + // Insert branch. + BuildMI(MBB, DL, TII.get(RISCV::BEQ)) + .addReg(CmpReg) + .addReg(RISCV::X0) + .addMBB(DoneMBB); + + CvtMBB->addSuccessor(DoneMBB); + + // Convert to integer. + Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); + if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) + MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); + + // Convert back to FP. + Register I2FReg = MRI.createVirtualRegister(RC); + MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); + if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) + MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); + + // Restore the sign bit. + Register CvtReg = MRI.createVirtualRegister(RC); + BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); + + // Merge the results. + BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) + .addReg(SrcReg) + .addMBB(MBB) + .addReg(CvtReg) + .addMBB(CvtMBB); + + MI.eraseFromParent(); + return DoneMBB; +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -10763,6 +10919,10 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); + case RISCV::PseudoFROUND_H: + case RISCV::PseudoFROUND_S: + case RISCV::PseudoFROUND_D: + return emitFROUND(MI, BB, Subtarget); } } @@ -12238,6 +12398,7 @@ NODE_NAME_CASE(FCVT_WU_RV64) NODE_NAME_CASE(STRICT_FCVT_W_RV64) NODE_NAME_CASE(STRICT_FCVT_WU_RV64) + NODE_NAME_CASE(FROUND) NODE_NAME_CASE(READ_CYCLE_WIDE) NODE_NAME_CASE(BREV8) NODE_NAME_CASE(ORC_B) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -336,6 +336,8 @@ defm Select_FPR64 : SelectCC_GPR_rrirr; +def PseudoFROUND_D : PseudoFROUND; + /// Loads defm : LdPat; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -26,6 +26,13 @@ : SDTypeProfile<1, 2, [SDTCisVT<0, XLenVT>, SDTCisFP<1>, SDTCisVT<2, XLenVT>]>; +def SDT_RISCVFROUND + : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisVT<3, XLenVT>]>; + +def riscv_fround + : SDNode<"RISCVISD::FROUND", SDT_RISCVFROUND>; + def riscv_fmv_w_x_rv64 : SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>; def riscv_fmv_x_anyextw_rv64 @@ -289,6 +296,16 @@ def Ext.Suffix : FPCmp_rr; } +class PseudoFROUND + : Pseudo<(outs Ty:$rd), (ins Ty:$rs1, Ty:$rs2, ixlenimm:$rm), + [(set Ty:$rd, (riscv_fround Ty:$rs1, Ty:$rs2, timm:$rm))]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + let usesCustomInserter = 1; + let mayRaiseFPException = 1; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -566,6 +583,8 @@ defm Select_FPR32 : SelectCC_GPR_rrirr; +def PseudoFROUND_S : PseudoFROUND; + /// Loads defm : LdPat; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -337,6 +337,8 @@ def : PatSetCC; defm Select_FPR16 : SelectCC_GPR_rrirr; + +def PseudoFROUND_H : PseudoFROUND; } // Predicates = [HasStdExtZfh] let Predicates = [HasStdExtZfhOrZfhmin] in { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -486,6 +486,18 @@ TTI::TargetCostKind CostKind) { auto *RetTy = ICA.getReturnType(); switch (ICA.getID()) { + case Intrinsic::ceil: + case Intrinsic::floor: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::round: + case Intrinsic::roundeven: { + // These all use the same code. + auto LT = getTypeLegalizationCost(RetTy); + if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second)) + return LT.first * 8; + break; + } case Intrinsic::umin: case Intrinsic::umax: case Intrinsic::smin: diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -3,7 +3,7 @@ define void @floor() { ; CHECK-LABEL: 'floor' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.floor.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.floor.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) @@ -13,7 +13,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.floor.nxv4f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.floor.nxv8f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.floor.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.floor.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.floor.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) @@ -48,7 +48,7 @@ define void @ceil() { ; CHECK-LABEL: 'ceil' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.ceil.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.ceil.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) @@ -58,7 +58,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.ceil.nxv4f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.ceil.nxv8f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.ceil.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.ceil.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.ceil.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) @@ -93,7 +93,7 @@ define void @trunc() { ; CHECK-LABEL: 'trunc' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.trunc.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.trunc.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.trunc.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) @@ -103,7 +103,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.trunc.nxv4f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.trunc.nxv8f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call @llvm.trunc.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.trunc.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.trunc.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) @@ -138,21 +138,21 @@ define void @rint() { ; CHECK-LABEL: 'rint' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.rint.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %2 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %3 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %4 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %5 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.rint.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %3 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %4 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %5 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.rint.nxv1f32( undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.rint.nxv2f32( undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.rint.nxv4f32( undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.rint.nxv8f32( undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.rint.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.rint.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %13 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %14 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.rint.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %13 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %14 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %15 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.rint.nxv1f64( undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.rint.nxv2f64( undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.rint.nxv4f64( undef) @@ -228,7 +228,7 @@ define void @round() { ; CHECK-LABEL: 'round' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.round.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.round.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.round.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.round.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.round.v8f32(<8 x float> undef) @@ -238,7 +238,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.round.nxv4f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.round.nxv8f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.round.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.round.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.round.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.round.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.round.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.round.v8f64(<8 x double> undef) @@ -273,7 +273,7 @@ define void @roundeven() { ; CHECK-LABEL: 'roundeven' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.roundeven.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.roundeven.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) @@ -283,7 +283,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.roundeven.nxv4f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.roundeven.nxv8f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.roundeven.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.roundeven.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.roundeven.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll @@ -596,9 +596,23 @@ declare double @llvm.floor.f64(double) define double @floor_f64(double %a) nounwind { -; CHECKIFD-LABEL: floor_f64: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail floor@plt +; RV32IFD-LABEL: floor_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail floor@plt +; +; RV64IFD-LABEL: floor_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI17_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB17_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rdn +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rdn +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB17_2: +; RV64IFD-NEXT: ret ; ; RV32I-LABEL: floor_f64: ; RV32I: # %bb.0: @@ -624,9 +638,23 @@ declare double @llvm.ceil.f64(double) define double @ceil_f64(double %a) nounwind { -; CHECKIFD-LABEL: ceil_f64: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail ceil@plt +; RV32IFD-LABEL: ceil_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail ceil@plt +; +; RV64IFD-LABEL: ceil_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI18_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI18_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB18_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rup +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rup +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB18_2: +; RV64IFD-NEXT: ret ; ; RV32I-LABEL: ceil_f64: ; RV32I: # %bb.0: @@ -652,9 +680,23 @@ declare double @llvm.trunc.f64(double) define double @trunc_f64(double %a) nounwind { -; CHECKIFD-LABEL: trunc_f64: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail trunc@plt +; RV32IFD-LABEL: trunc_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail trunc@plt +; +; RV64IFD-LABEL: trunc_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI19_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB19_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rtz +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB19_2: +; RV64IFD-NEXT: ret ; ; RV32I-LABEL: trunc_f64: ; RV32I: # %bb.0: @@ -680,9 +722,23 @@ declare double @llvm.rint.f64(double) define double @rint_f64(double %a) nounwind { -; CHECKIFD-LABEL: rint_f64: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail rint@plt +; RV32IFD-LABEL: rint_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail rint@plt +; +; RV64IFD-LABEL: rint_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI20_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI20_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB20_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0 +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB20_2: +; RV64IFD-NEXT: ret ; ; RV32I-LABEL: rint_f64: ; RV32I: # %bb.0: @@ -736,9 +792,23 @@ declare double @llvm.round.f64(double) define double @round_f64(double %a) nounwind { -; CHECKIFD-LABEL: round_f64: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail round@plt +; RV32IFD-LABEL: round_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail round@plt +; +; RV64IFD-LABEL: round_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI22_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI22_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB22_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rmm +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rmm +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB22_2: +; RV64IFD-NEXT: ret ; ; RV32I-LABEL: round_f64: ; RV32I: # %bb.0: @@ -764,9 +834,23 @@ declare double @llvm.roundeven.f64(double) define double @roundeven_f64(double %a) nounwind { -; CHECKIFD-LABEL: roundeven_f64: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail roundeven@plt +; RV32IFD-LABEL: roundeven_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail roundeven@plt +; +; RV64IFD-LABEL: roundeven_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI23_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI23_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB23_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rne +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rne +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB23_2: +; RV64IFD-NEXT: ret ; ; RV32I-LABEL: roundeven_f64: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/double-round-conv.ll b/llvm/test/CodeGen/RISCV/double-round-conv.ll --- a/llvm/test/CodeGen/RISCV/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv.ll @@ -625,41 +625,111 @@ } define double @test_floor_double(double %x) { -; CHECKIFD-LABEL: test_floor_double: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail floor@plt +; RV32IFD-LABEL: test_floor_double: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail floor@plt +; +; RV64IFD-LABEL: test_floor_double: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI40_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI40_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB40_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rdn +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rdn +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB40_2: +; RV64IFD-NEXT: ret %a = call double @llvm.floor.f64(double %x) ret double %a } define double @test_ceil_double(double %x) { -; CHECKIFD-LABEL: test_ceil_double: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail ceil@plt +; RV32IFD-LABEL: test_ceil_double: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail ceil@plt +; +; RV64IFD-LABEL: test_ceil_double: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI41_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI41_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB41_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rup +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rup +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB41_2: +; RV64IFD-NEXT: ret %a = call double @llvm.ceil.f64(double %x) ret double %a } define double @test_trunc_double(double %x) { -; CHECKIFD-LABEL: test_trunc_double: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail trunc@plt +; RV32IFD-LABEL: test_trunc_double: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail trunc@plt +; +; RV64IFD-LABEL: test_trunc_double: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI42_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI42_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB42_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rtz +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB42_2: +; RV64IFD-NEXT: ret %a = call double @llvm.trunc.f64(double %x) ret double %a } define double @test_round_double(double %x) { -; CHECKIFD-LABEL: test_round_double: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail round@plt +; RV32IFD-LABEL: test_round_double: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail round@plt +; +; RV64IFD-LABEL: test_round_double: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI43_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI43_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB43_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rmm +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rmm +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB43_2: +; RV64IFD-NEXT: ret %a = call double @llvm.round.f64(double %x) ret double %a } define double @test_roundeven_double(double %x) { -; CHECKIFD-LABEL: test_roundeven_double: -; CHECKIFD: # %bb.0: -; CHECKIFD-NEXT: tail roundeven@plt +; RV32IFD-LABEL: test_roundeven_double: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail roundeven@plt +; +; RV64IFD-LABEL: test_roundeven_double: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lui a0, %hi(.LCPI44_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI44_0)(a0) +; RV64IFD-NEXT: fabs.d ft1, fa0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: beqz a0, .LBB44_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rne +; RV64IFD-NEXT: fcvt.d.l ft0, a0, rne +; RV64IFD-NEXT: fsgnj.d fa0, ft0, fa0 +; RV64IFD-NEXT: .LBB44_2: +; RV64IFD-NEXT: ret %a = call double @llvm.roundeven.f64(double %x) ret double %a } diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -640,11 +640,31 @@ define float @floor_f32(float %a) nounwind { ; RV32IF-LABEL: floor_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail floorf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI17_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB17_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV32IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB17_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: floor_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail floorf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI17_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI17_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB17_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV64IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB17_2: +; RV64IF-NEXT: ret ; ; RV32I-LABEL: floor_f32: ; RV32I: # %bb.0: @@ -672,11 +692,31 @@ define float @ceil_f32(float %a) nounwind { ; RV32IF-LABEL: ceil_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail ceilf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI18_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI18_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB18_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rup +; RV32IF-NEXT: fcvt.s.w ft0, a0, rup +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB18_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: ceil_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail ceilf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI18_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI18_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB18_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rup +; RV64IF-NEXT: fcvt.s.w ft0, a0, rup +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB18_2: +; RV64IF-NEXT: ret ; ; RV32I-LABEL: ceil_f32: ; RV32I: # %bb.0: @@ -704,11 +744,31 @@ define float @trunc_f32(float %a) nounwind { ; RV32IF-LABEL: trunc_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB19_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB19_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: trunc_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail truncf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB19_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB19_2: +; RV64IF-NEXT: ret ; ; RV32I-LABEL: trunc_f32: ; RV32I: # %bb.0: @@ -736,11 +796,31 @@ define float @rint_f32(float %a) nounwind { ; RV32IF-LABEL: rint_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail rintf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI20_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI20_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB20_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0 +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB20_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: rint_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail rintf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI20_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI20_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB20_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0 +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB20_2: +; RV64IF-NEXT: ret ; ; RV32I-LABEL: rint_f32: ; RV32I: # %bb.0: @@ -800,11 +880,31 @@ define float @round_f32(float %a) nounwind { ; RV32IF-LABEL: round_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail roundf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI22_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI22_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB22_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV32IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB22_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: round_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail roundf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI22_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI22_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB22_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV64IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB22_2: +; RV64IF-NEXT: ret ; ; RV32I-LABEL: round_f32: ; RV32I: # %bb.0: @@ -832,11 +932,31 @@ define float @roundeven_f32(float %a) nounwind { ; RV32IF-LABEL: roundeven_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail roundevenf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI23_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB23_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rne +; RV32IF-NEXT: fcvt.s.w ft0, a0, rne +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB23_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: roundeven_f32: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail roundevenf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI23_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI23_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB23_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rne +; RV64IF-NEXT: fcvt.s.w ft0, a0, rne +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB23_2: +; RV64IF-NEXT: ret ; ; RV32I-LABEL: roundeven_f32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -25,31 +25,41 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call floorf@plt ; RV32IF-NEXT: lui a0, %hi(.LCPI1_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI1_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB1_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rdn +; RV32IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB1_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: bnez s0, .LBB1_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB1_4 +; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: .LBB1_2: -; RV32IF-NEXT: lui a2, %hi(.LCPI1_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI1_1)(a2) +; RV32IF-NEXT: .LBB1_4: +; RV32IF-NEXT: lui a2, %hi(.LCPI1_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI1_2)(a2) ; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: beqz a2, .LBB1_4 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a2, .LBB1_6 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: addi a1, a3, -1 -; RV32IF-NEXT: .LBB1_4: +; RV32IF-NEXT: .LBB1_6: ; RV32IF-NEXT: feq.s a3, fs0, fs0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -91,23 +101,34 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a0) -; RV32IF-NEXT: flt.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB3_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rdn +; RV32IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB3_2: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s1, a0 +; RV32IF-NEXT: fle.s a0, ft0, fs0 +; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: or a0, s0, a0 -; RV32IF-NEXT: and a1, s1, a1 -; RV32IF-NEXT: or a1, s0, a1 +; RV32IF-NEXT: lui a2, %hi(.LCPI3_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI3_1)(a2) +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: flt.s a2, ft0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -145,31 +166,41 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call ceilf@plt ; RV32IF-NEXT: lui a0, %hi(.LCPI5_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI5_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB5_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rup +; RV32IF-NEXT: fcvt.s.w ft0, a0, rup +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB5_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI5_1)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: bnez s0, .LBB5_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB5_4 +; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: .LBB5_2: -; RV32IF-NEXT: lui a2, %hi(.LCPI5_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI5_1)(a2) +; RV32IF-NEXT: .LBB5_4: +; RV32IF-NEXT: lui a2, %hi(.LCPI5_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI5_2)(a2) ; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: beqz a2, .LBB5_4 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a2, .LBB5_6 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: addi a1, a3, -1 -; RV32IF-NEXT: .LBB5_4: +; RV32IF-NEXT: .LBB5_6: ; RV32IF-NEXT: feq.s a3, fs0, fs0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -211,23 +242,34 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: lui a0, %hi(.LCPI7_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; RV32IF-NEXT: flt.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB7_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rup +; RV32IF-NEXT: fcvt.s.w ft0, a0, rup +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB7_2: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s1, a0 +; RV32IF-NEXT: fle.s a0, ft0, fs0 +; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: or a0, s0, a0 -; RV32IF-NEXT: and a1, s1, a1 -; RV32IF-NEXT: or a1, s0, a1 +; RV32IF-NEXT: lui a2, %hi(.LCPI7_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI7_1)(a2) +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: flt.s a2, ft0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -265,31 +307,41 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call truncf@plt ; RV32IF-NEXT: lui a0, %hi(.LCPI9_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI9_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB9_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB9_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: bnez s0, .LBB9_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB9_4 +; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: .LBB9_2: -; RV32IF-NEXT: lui a2, %hi(.LCPI9_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI9_1)(a2) +; RV32IF-NEXT: .LBB9_4: +; RV32IF-NEXT: lui a2, %hi(.LCPI9_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI9_2)(a2) ; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: beqz a2, .LBB9_4 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a2, .LBB9_6 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: addi a1, a3, -1 -; RV32IF-NEXT: .LBB9_4: +; RV32IF-NEXT: .LBB9_6: ; RV32IF-NEXT: feq.s a3, fs0, fs0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -331,23 +383,34 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) -; RV32IF-NEXT: flt.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB11_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB11_2: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s1, a0 +; RV32IF-NEXT: fle.s a0, ft0, fs0 +; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: or a0, s0, a0 -; RV32IF-NEXT: and a1, s1, a1 -; RV32IF-NEXT: or a1, s0, a1 +; RV32IF-NEXT: lui a2, %hi(.LCPI11_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_1)(a2) +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: flt.s a2, ft0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -385,31 +448,41 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call roundf@plt ; RV32IF-NEXT: lui a0, %hi(.LCPI13_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI13_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB13_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rmm +; RV32IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB13_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI13_1)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: bnez s0, .LBB13_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB13_4 +; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: .LBB13_2: -; RV32IF-NEXT: lui a2, %hi(.LCPI13_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI13_1)(a2) +; RV32IF-NEXT: .LBB13_4: +; RV32IF-NEXT: lui a2, %hi(.LCPI13_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI13_2)(a2) ; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: beqz a2, .LBB13_4 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a2, .LBB13_6 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: addi a1, a3, -1 -; RV32IF-NEXT: .LBB13_4: +; RV32IF-NEXT: .LBB13_6: ; RV32IF-NEXT: feq.s a3, fs0, fs0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -451,23 +524,34 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call roundf@plt +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: lui a0, %hi(.LCPI15_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI15_0)(a0) -; RV32IF-NEXT: flt.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB15_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rmm +; RV32IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB15_2: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s1, a0 +; RV32IF-NEXT: fle.s a0, ft0, fs0 +; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: or a0, s0, a0 -; RV32IF-NEXT: and a1, s1, a1 -; RV32IF-NEXT: or a1, s0, a1 +; RV32IF-NEXT: lui a2, %hi(.LCPI15_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI15_1)(a2) +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: flt.s a2, ft0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -505,31 +589,41 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call roundevenf@plt ; RV32IF-NEXT: lui a0, %hi(.LCPI17_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI17_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 -; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB17_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rne +; RV32IF-NEXT: fcvt.s.w ft0, a0, rne +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB17_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI17_1)(a0) +; RV32IF-NEXT: fle.s s0, ft0, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: bnez s0, .LBB17_2 -; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: bnez s0, .LBB17_4 +; RV32IF-NEXT: # %bb.3: ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: .LBB17_2: -; RV32IF-NEXT: lui a2, %hi(.LCPI17_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI17_1)(a2) +; RV32IF-NEXT: .LBB17_4: +; RV32IF-NEXT: lui a2, %hi(.LCPI17_2) +; RV32IF-NEXT: flw ft0, %lo(.LCPI17_2)(a2) ; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: beqz a2, .LBB17_4 -; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz a2, .LBB17_6 +; RV32IF-NEXT: # %bb.5: ; RV32IF-NEXT: addi a1, a3, -1 -; RV32IF-NEXT: .LBB17_4: +; RV32IF-NEXT: .LBB17_6: ; RV32IF-NEXT: feq.s a3, fs0, fs0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -571,23 +665,34 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: call roundevenf@plt +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: lui a0, %hi(.LCPI19_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) -; RV32IF-NEXT: flt.s a0, ft0, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB19_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0, rne +; RV32IF-NEXT: fcvt.s.w ft0, a0, rne +; RV32IF-NEXT: fsgnj.s fs0, ft0, fs0 +; RV32IF-NEXT: .LBB19_2: ; RV32IF-NEXT: fmv.w.x ft0, zero -; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: neg s1, a0 +; RV32IF-NEXT: fle.s a0, ft0, fs0 +; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: or a0, s0, a0 -; RV32IF-NEXT: and a1, s1, a1 -; RV32IF-NEXT: or a1, s0, a1 +; RV32IF-NEXT: lui a2, %hi(.LCPI19_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI19_1)(a2) +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: flt.s a2, ft0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-round-conv.ll b/llvm/test/CodeGen/RISCV/float-round-conv.ll --- a/llvm/test/CodeGen/RISCV/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv.ll @@ -56,7 +56,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB3_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV32IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB3_2: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -123,7 +132,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB7_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV32IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB7_2: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -190,7 +208,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB11_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rup +; RV32IF-NEXT: fcvt.s.w ft0, a0, rup +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB11_2: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -257,7 +284,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI15_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB15_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rup +; RV32IF-NEXT: fcvt.s.w ft0, a0, rup +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB15_2: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -324,7 +360,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB19_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB19_2: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -391,7 +436,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI23_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB23_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB23_2: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -458,7 +512,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call roundf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI27_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI27_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB27_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV32IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB27_2: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -525,7 +588,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call roundf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI31_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI31_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB31_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV32IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB31_2: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -592,7 +664,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call roundevenf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI35_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI35_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB35_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rne +; RV32IF-NEXT: fcvt.s.w ft0, a0, rne +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB35_2: ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -659,7 +740,16 @@ ; RV32IF-NEXT: .cfi_def_cfa_offset 16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 -; RV32IF-NEXT: call roundevenf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI39_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI39_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB39_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rne +; RV32IF-NEXT: fcvt.s.w ft0, a0, rne +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB39_2: ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -698,11 +788,31 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_floor_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail floorf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI40_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI40_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB40_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV32IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB40_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_floor_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail floorf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI40_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI40_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB40_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV64IF-NEXT: fcvt.s.w ft0, a0, rdn +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB40_2: +; RV64IF-NEXT: ret %a = call float @llvm.floor.f32(float %x) ret float %a } @@ -731,11 +841,31 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_ceil_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail ceilf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI41_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI41_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB41_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rup +; RV32IF-NEXT: fcvt.s.w ft0, a0, rup +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB41_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_ceil_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail ceilf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI41_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI41_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB41_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rup +; RV64IF-NEXT: fcvt.s.w ft0, a0, rup +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB41_2: +; RV64IF-NEXT: ret %a = call float @llvm.ceil.f32(float %x) ret float %a } @@ -764,11 +894,31 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_trunc_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI42_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI42_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB42_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB42_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_trunc_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail truncf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI42_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI42_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB42_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV64IF-NEXT: fcvt.s.w ft0, a0, rtz +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB42_2: +; RV64IF-NEXT: ret %a = call float @llvm.trunc.f32(float %x) ret float %a } @@ -797,11 +947,31 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_round_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail roundf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI43_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI43_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB43_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV32IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB43_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_round_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail roundf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI43_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI43_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB43_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV64IF-NEXT: fcvt.s.w ft0, a0, rmm +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB43_2: +; RV64IF-NEXT: ret %a = call float @llvm.round.f32(float %x) ret float %a } @@ -830,11 +1000,31 @@ ; RV64IFD-NEXT: ret ; RV32IF-LABEL: test_roundeven_float: ; RV32IF: # %bb.0: -; RV32IF-NEXT: tail roundevenf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI44_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI44_0)(a0) +; RV32IF-NEXT: fabs.s ft1, fa0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: beqz a0, .LBB44_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rne +; RV32IF-NEXT: fcvt.s.w ft0, a0, rne +; RV32IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV32IF-NEXT: .LBB44_2: +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: test_roundeven_float: ; RV64IF: # %bb.0: -; RV64IF-NEXT: tail roundevenf@plt +; RV64IF-NEXT: lui a0, %hi(.LCPI44_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI44_0)(a0) +; RV64IF-NEXT: fabs.s ft1, fa0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: beqz a0, .LBB44_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rne +; RV64IF-NEXT: fcvt.s.w ft0, a0, rne +; RV64IF-NEXT: fsgnj.s fa0, ft0, fa0 +; RV64IF-NEXT: .LBB44_2: +; RV64IF-NEXT: ret %a = call float @llvm.roundeven.f32(float %x) ret float %a } diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -1097,27 +1097,19 @@ declare half @llvm.floor.f16(half) define half @floor_f16(half %a) nounwind { -; RV32IZFH-LABEL: floor_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: floor_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call floorf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: floor_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI17_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB17_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rdn +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rdn +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB17_2: +; CHECKIZFH-NEXT: ret ; ; RV32I-LABEL: floor_f16: ; RV32I: # %bb.0: @@ -1151,27 +1143,19 @@ declare half @llvm.ceil.f16(half) define half @ceil_f16(half %a) nounwind { -; RV32IZFH-LABEL: ceil_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: ceil_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call ceilf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: ceil_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI18_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB18_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rup +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rup +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB18_2: +; CHECKIZFH-NEXT: ret ; ; RV32I-LABEL: ceil_f16: ; RV32I: # %bb.0: @@ -1205,27 +1189,19 @@ declare half @llvm.trunc.f16(half) define half @trunc_f16(half %a) nounwind { -; RV32IZFH-LABEL: trunc_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: trunc_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call truncf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: trunc_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI19_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB19_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rtz +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB19_2: +; CHECKIZFH-NEXT: ret ; ; RV32I-LABEL: trunc_f16: ; RV32I: # %bb.0: @@ -1259,27 +1235,19 @@ declare half @llvm.rint.f16(half) define half @rint_f16(half %a) nounwind { -; RV32IZFH-LABEL: rint_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call rintf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: rint_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call rintf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: rint_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI20_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB20_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0 +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0 +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB20_2: +; CHECKIZFH-NEXT: ret ; ; RV32I-LABEL: rint_f16: ; RV32I: # %bb.0: @@ -1367,27 +1335,19 @@ declare half @llvm.round.f16(half) define half @round_f16(half %a) nounwind { -; RV32IZFH-LABEL: round_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: round_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call roundf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: round_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI22_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI22_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB22_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rmm +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rmm +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB22_2: +; CHECKIZFH-NEXT: ret ; ; RV32I-LABEL: round_f16: ; RV32I: # %bb.0: @@ -1421,27 +1381,19 @@ declare half @llvm.roundeven.f16(half) define half @roundeven_f16(half %a) nounwind { -; RV32IZFH-LABEL: roundeven_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundevenf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: roundeven_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call roundevenf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: roundeven_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI23_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI23_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB23_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rne +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rne +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB23_2: +; CHECKIZFH-NEXT: ret ; ; RV32I-LABEL: roundeven_f16: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -25,34 +25,41 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt ; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB1_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rdn +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rdn +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB1_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: lui a3, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB1_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB1_4 +; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: .LBB1_2: -; RV32IZFH-NEXT: lui a2, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_1)(a2) +; RV32IZFH-NEXT: .LBB1_4: +; RV32IZFH-NEXT: lui a2, %hi(.LCPI1_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_2)(a2) ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB1_4 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a2, .LBB1_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: addi a1, a3, -1 -; RV32IZFH-NEXT: .LBB1_4: +; RV32IZFH-NEXT: .LBB1_6: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -94,26 +101,34 @@ ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 -; RV32IZFH-NEXT: flt.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB3_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rdn +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rdn +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB3_2: +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: fle.s a0, ft0, fs0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: or a0, s0, a0 -; RV32IZFH-NEXT: and a1, s1, a1 -; RV32IZFH-NEXT: or a1, s0, a1 +; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_1)(a2) +; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: flt.s a2, ft0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -151,34 +166,41 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt ; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB5_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rup +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rup +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB5_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: lui a3, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB5_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB5_4 +; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: .LBB5_2: -; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_1)(a2) +; RV32IZFH-NEXT: .LBB5_4: +; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_2)(a2) ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB5_4 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a2, .LBB5_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: addi a1, a3, -1 -; RV32IZFH-NEXT: .LBB5_4: +; RV32IZFH-NEXT: .LBB5_6: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -220,26 +242,34 @@ ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 -; RV32IZFH-NEXT: flt.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rup +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rup +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: fle.s a0, ft0, fs0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: or a0, s0, a0 -; RV32IZFH-NEXT: and a1, s1, a1 -; RV32IZFH-NEXT: or a1, s0, a1 +; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI7_1)(a2) +; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: flt.s a2, ft0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -277,34 +307,41 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt ; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI9_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB9_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rtz +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB9_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: lui a3, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB9_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB9_4 +; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: .LBB9_2: -; RV32IZFH-NEXT: lui a2, %hi(.LCPI9_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_1)(a2) +; RV32IZFH-NEXT: .LBB9_4: +; RV32IZFH-NEXT: lui a2, %hi(.LCPI9_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_2)(a2) ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB9_4 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a2, .LBB9_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: addi a1, a3, -1 -; RV32IZFH-NEXT: .LBB9_4: +; RV32IZFH-NEXT: .LBB9_6: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -346,26 +383,34 @@ ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 -; RV32IZFH-NEXT: flt.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rtz +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB11_2: +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: fle.s a0, ft0, fs0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: or a0, s0, a0 -; RV32IZFH-NEXT: and a1, s1, a1 -; RV32IZFH-NEXT: or a1, s0, a1 +; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_1)(a2) +; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: flt.s a2, ft0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -403,34 +448,41 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundf@plt ; RV32IZFH-NEXT: lui a0, %hi(.LCPI13_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI13_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI13_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB13_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rmm +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB13_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI13_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: lui a3, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB13_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB13_4 +; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: .LBB13_2: -; RV32IZFH-NEXT: lui a2, %hi(.LCPI13_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI13_1)(a2) +; RV32IZFH-NEXT: .LBB13_4: +; RV32IZFH-NEXT: lui a2, %hi(.LCPI13_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI13_2)(a2) ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB13_4 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a2, .LBB13_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: addi a1, a3, -1 -; RV32IZFH-NEXT: .LBB13_4: +; RV32IZFH-NEXT: .LBB13_6: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -472,26 +524,34 @@ ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundf@plt +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI15_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 -; RV32IZFH-NEXT: flt.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB15_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rmm +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB15_2: +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: fle.s a0, ft0, fs0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: or a0, s0, a0 -; RV32IZFH-NEXT: and a1, s1, a1 -; RV32IZFH-NEXT: or a1, s0, a1 +; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI15_1)(a2) +; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: flt.s a2, ft0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -529,34 +589,41 @@ ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundevenf@plt ; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI17_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB17_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rne +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rne +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB17_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI17_1)(a0) +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fle.s s0, ft0, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi@plt ; RV32IZFH-NEXT: lui a3, 524288 -; RV32IZFH-NEXT: bnez s0, .LBB17_2 -; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: bnez s0, .LBB17_4 +; RV32IZFH-NEXT: # %bb.3: ; RV32IZFH-NEXT: lui a1, 524288 -; RV32IZFH-NEXT: .LBB17_2: -; RV32IZFH-NEXT: lui a2, %hi(.LCPI17_1) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI17_1)(a2) +; RV32IZFH-NEXT: .LBB17_4: +; RV32IZFH-NEXT: lui a2, %hi(.LCPI17_2) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI17_2)(a2) ; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: beqz a2, .LBB17_4 -; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz a2, .LBB17_6 +; RV32IZFH-NEXT: # %bb.5: ; RV32IZFH-NEXT: addi a1, a3, -1 -; RV32IZFH-NEXT: .LBB17_4: +; RV32IZFH-NEXT: .LBB17_6: ; RV32IZFH-NEXT: feq.s a3, fs0, fs0 ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -598,26 +665,34 @@ ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundevenf@plt +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI19_0)(a0) -; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 -; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 -; RV32IZFH-NEXT: flt.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB19_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rne +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rne +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB19_2: +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: fle.s a0, ft0, fs0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: or a0, s0, a0 -; RV32IZFH-NEXT: and a1, s1, a1 -; RV32IZFH-NEXT: or a1, s0, a1 +; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI19_1)(a2) +; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: flt.s a2, ft0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: or a1, a2, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/half-round-conv.ll b/llvm/test/CodeGen/RISCV/half-round-conv.ll --- a/llvm/test/CodeGen/RISCV/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv.ll @@ -51,9 +51,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB3_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rdn +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rdn +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB3_2: ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -115,9 +122,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rdn +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rdn +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB7_2: ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -179,9 +193,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rup +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rup +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB11_2: ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -243,9 +264,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB15_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rup +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rup +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB15_2: ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -307,9 +335,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB19_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rtz +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB19_2: ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -371,9 +406,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI23_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB23_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rtz +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB23_2: ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -435,9 +477,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI27_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI27_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB27_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rmm +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB27_2: ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -499,9 +548,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI31_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI31_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB31_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rmm +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB31_2: ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -563,9 +619,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundevenf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI35_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI35_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB35_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rne +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rne +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB35_2: ; RV32IZFH-NEXT: call __fixhfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -627,9 +690,16 @@ ; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundevenf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI39_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI39_0)(a0) +; RV32IZFH-NEXT: fabs.h ft1, fa0 +; RV32IZFH-NEXT: flt.h a0, ft1, ft0 +; RV32IZFH-NEXT: beqz a0, .LBB39_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rne +; RV32IZFH-NEXT: fcvt.h.w ft0, a0, rne +; RV32IZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; RV32IZFH-NEXT: .LBB39_2: ; RV32IZFH-NEXT: call __fixunshfdi@plt ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -666,31 +736,19 @@ ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret -; RV32IZFH-LABEL: test_floor_half: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: test_floor_half: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call floorf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: test_floor_half: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI40_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI40_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB40_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rdn +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rdn +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB40_2: +; CHECKIZFH-NEXT: ret %a = call half @llvm.floor.f16(half %x) ret half %a } @@ -717,31 +775,19 @@ ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret -; RV32IZFH-LABEL: test_ceil_half: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: test_ceil_half: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call ceilf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: test_ceil_half: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI41_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI41_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB41_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rup +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rup +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB41_2: +; CHECKIZFH-NEXT: ret %a = call half @llvm.ceil.f16(half %x) ret half %a } @@ -768,31 +814,19 @@ ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret -; RV32IZFH-LABEL: test_trunc_half: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: test_trunc_half: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call truncf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: test_trunc_half: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI42_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI42_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB42_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rtz +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rtz +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB42_2: +; CHECKIZFH-NEXT: ret %a = call half @llvm.trunc.f16(half %x) ret half %a } @@ -819,31 +853,19 @@ ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret -; RV32IZFH-LABEL: test_round_half: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: test_round_half: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call roundf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: test_round_half: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI43_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI43_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB43_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rmm +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rmm +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB43_2: +; CHECKIZFH-NEXT: ret %a = call half @llvm.round.f16(half %x) ret half %a } @@ -870,31 +892,19 @@ ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret -; RV32IZFH-LABEL: test_roundeven_half: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: .cfi_offset ra, -4 -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call roundevenf@plt -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: test_roundeven_half: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: .cfi_def_cfa_offset 16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: .cfi_offset ra, -8 -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call roundevenf@plt -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; CHECKIZFH-LABEL: test_roundeven_half: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: lui a0, %hi(.LCPI44_0) +; CHECKIZFH-NEXT: flh ft0, %lo(.LCPI44_0)(a0) +; CHECKIZFH-NEXT: fabs.h ft1, fa0 +; CHECKIZFH-NEXT: flt.h a0, ft1, ft0 +; CHECKIZFH-NEXT: beqz a0, .LBB44_2 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0, rne +; CHECKIZFH-NEXT: fcvt.h.w ft0, a0, rne +; CHECKIZFH-NEXT: fsgnj.h fa0, ft0, fa0 +; CHECKIZFH-NEXT: .LBB44_2: +; CHECKIZFH-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) ret half %a }