diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -254,6 +254,7 @@ FCOPYSIGN_VL, // Has a merge operand FP_TO_SINT_VL, FP_TO_UINT_VL, + VFCVT_X_F_VL, SINT_TO_FP_VL, UINT_TO_FP_VL, FP_ROUND_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1839,8 +1839,6 @@ // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain // and back. Taking care to avoid converting values that are nan or already // correct. -// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't -// have FRM dependencies modeled yet. static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -1887,41 +1885,30 @@ // Truncate to integer and convert back to FP. MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); - SDValue Truncated = - DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL); - Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, - Mask, VL); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Truncated; - if (Op.getOpcode() == ISD::FCEIL) { - // If the truncated value is the greater than or equal to the original - // value, we've computed the ceil. Otherwise, we went the wrong way and - // need to increase by 1. - // FIXME: This should use a masked operation. Handle here or in isel? - SDValue SplatVal = - DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType()); - SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), SplatVal, VL); - SDValue NeedAdjust = DAG.getNode( - RISCVISD::SETCC_VL, DL, SetccVT, - {Truncated, Src, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); - Truncated = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Truncated, - Splat, Truncated, NeedAdjust, VL); - } else if (Op.getOpcode() == ISD::FFLOOR) { - // If the truncated value is the less than or equal to the original value, - // we've computed the floor. Otherwise, we went the wrong way and need to - // decrease by 1. - // FIXME: This should use a masked operation. Handle here or in isel? - SDValue SplatVal = - DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType()); - SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), SplatVal, VL); - SDValue NeedAdjust = DAG.getNode( - RISCVISD::SETCC_VL, DL, SetccVT, - {Src, Truncated, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); - Truncated = DAG.getNode(RISCVISD::FSUB_VL, DL, ContainerVT, Truncated, - Splat, Truncated, NeedAdjust, VL); + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case ISD::FCEIL: + Truncated = + DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, + DAG.getTargetConstant(RISCVFPRndMode::RUP, DL, XLenVT), VL); + break; + case ISD::FFLOOR: + Truncated = + DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, + DAG.getTargetConstant(RISCVFPRndMode::RDN, DL, XLenVT), VL); + break; + case ISD::FTRUNC: + Truncated = DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL); + break; } + Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, + Mask, VL); + // Restore the original sign so that -0.0 is preserved. Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, Src, Src, Mask, VL); @@ -10664,6 +10651,41 @@ return TailMBB; } +static MachineBasicBlock * +emitVFCVT_RM_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) { + DebugLoc DL = MI.getDebugLoc(); + + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass); + + // Update FRM and save the old value. + BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM) + .addImm(MI.getOperand(4).getImm()); + + // Emit an VFCVT without the FRM operand. + assert(MI.getNumOperands() == 8); + auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(5)) + .add(MI.getOperand(6)) + .add(MI.getOperand(7)); + if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) + MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); + + // Restore FRM. + BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM)) + .addReg(SavedFRM, RegState::Kill); + + // Erase the pseudoinstruction. + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -10695,6 +10717,18 @@ return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); case RISCV::PseudoQuietFLT_D: return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); + case RISCV::PseudoVFCVT_RM_X_F_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK); + case RISCV::PseudoVFCVT_RM_X_F_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK); + case RISCV::PseudoVFCVT_RM_X_F_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK); + case RISCV::PseudoVFCVT_RM_X_F_V_M8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK); + case RISCV::PseudoVFCVT_RM_X_F_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); + case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); } } @@ -12242,6 +12276,7 @@ NODE_NAME_CASE(MULHU_VL) NODE_NAME_CASE(FP_TO_SINT_VL) NODE_NAME_CASE(FP_TO_UINT_VL) + NODE_NAME_CASE(VFCVT_X_F_VL) NODE_NAME_CASE(SINT_TO_FP_VL) NODE_NAME_CASE(UINT_TO_FP_VL) NODE_NAME_CASE(FP_EXTEND_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1555,6 +1555,7 @@ def ReadFRM : ReadSysReg; def WriteFRM : WriteSysReg; def WriteFRMImm : WriteSysRegImm; +def SwapFRMImm : SwapSysRegImm; let hasSideEffects = true in { def ReadFFLAGS : ReadSysReg; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1031,6 +1031,22 @@ let UsesMaskPolicy = 1; } +class VPseudoUnaryMaskTA_FRM : + Pseudo<(outs GetVRegNoV0.R:$rd), + (ins GetVRegNoV0.R:$merge, OpClass:$rs2, + VMaskOp:$vm, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasVecPolicyOp = 1; + let UsesMaskPolicy = 1; + let usesCustomInserter = 1; +} + // mask unary operation without maskedoff class VPseudoMaskUnarySOutMask: Pseudo<(outs GPR:$rd), @@ -2769,12 +2785,28 @@ } } +multiclass VPseudoConversionRM { + let VLMul = MInfo.value in { + def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA_FRM; + } +} + multiclass VPseudoVCVTI_V { foreach m = MxListF in defm _V : VPseudoConversion, Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; } +multiclass VPseudoVCVTI_RM_V { + foreach m = MxListF in + defm _V : VPseudoConversionRM, + Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; +} + multiclass VPseudoVCVTF_V { foreach m = MxListF in defm _V : VPseudoConversion, @@ -4849,6 +4881,7 @@ } defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; +defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V; let Uses = [FRM] in { defm PseudoVFCVT_F_XU : VPseudoVCVTF_V; defm PseudoVFCVT_F_X : VPseudoVCVTF_V; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -146,6 +146,14 @@ def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; +def SDT_RISCVVecCvtX2FOp_VL : SDTypeProfile<1, 4, [ + SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>, + SDTCisVT<4, XLenVT> +]>; + +def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVVecCvtX2FOp_VL>; + def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>, SDTCisVec<1>, @@ -645,6 +653,19 @@ } } +multiclass VPatConvertFP2I_RM_VL_V { + foreach fvti = AllFloatVectors in { + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask V0), (XLenVT timm:$frm), + VLOpFrag)), + (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") + (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW, + TAIL_AGNOSTIC)>; + } +} + multiclass VPatConvertI2FPVL_V { foreach fvti = AllFloatVectors in { defvar ivti = GetIntVTypeInfo.Vti; @@ -1471,6 +1492,7 @@ GPR:$vl, fvti.Log2SEW)>; // 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions + defm : VPatConvertFP2I_RM_VL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertI2FPVL_V; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -264,38 +264,38 @@ // instruction counts with the following adjustments made: // * One vsetvli is considered free. static const CostTblEntry VectorIntrinsicCostTable[]{ - {Intrinsic::floor, MVT::v2f32, 15}, - {Intrinsic::floor, MVT::v4f32, 15}, - {Intrinsic::floor, MVT::v8f32, 15}, - {Intrinsic::floor, MVT::v16f32, 15}, - {Intrinsic::floor, MVT::nxv2f32, 15}, - {Intrinsic::floor, MVT::nxv4f32, 15}, - {Intrinsic::floor, MVT::nxv8f32, 15}, - {Intrinsic::floor, MVT::nxv16f32, 15}, - {Intrinsic::floor, MVT::v2f64, 15}, - {Intrinsic::floor, MVT::v4f64, 15}, - {Intrinsic::floor, MVT::v8f64, 15}, - {Intrinsic::floor, MVT::v16f64, 15}, - {Intrinsic::floor, MVT::nxv1f64, 15}, - {Intrinsic::floor, MVT::nxv2f64, 15}, - {Intrinsic::floor, MVT::nxv4f64, 15}, - {Intrinsic::floor, MVT::nxv8f64, 15}, - {Intrinsic::ceil, MVT::v2f32, 15}, - {Intrinsic::ceil, MVT::v4f32, 15}, - {Intrinsic::ceil, MVT::v8f32, 15}, - {Intrinsic::ceil, MVT::v16f32, 15}, - {Intrinsic::ceil, MVT::nxv2f32, 15}, - {Intrinsic::ceil, MVT::nxv4f32, 15}, - {Intrinsic::ceil, MVT::nxv8f32, 15}, - {Intrinsic::ceil, MVT::nxv16f32, 15}, - {Intrinsic::ceil, MVT::v2f64, 15}, - {Intrinsic::ceil, MVT::v4f64, 15}, - {Intrinsic::ceil, MVT::v8f64, 15}, - {Intrinsic::ceil, MVT::v16f64, 15}, - {Intrinsic::ceil, MVT::nxv1f64, 15}, - {Intrinsic::ceil, MVT::nxv2f64, 15}, - {Intrinsic::ceil, MVT::nxv4f64, 15}, - {Intrinsic::ceil, MVT::nxv8f64, 15}, + {Intrinsic::floor, MVT::v2f32, 9}, + {Intrinsic::floor, MVT::v4f32, 9}, + {Intrinsic::floor, MVT::v8f32, 9}, + {Intrinsic::floor, MVT::v16f32, 9}, + {Intrinsic::floor, MVT::nxv2f32, 9}, + {Intrinsic::floor, MVT::nxv4f32, 9}, + {Intrinsic::floor, MVT::nxv8f32, 9}, + {Intrinsic::floor, MVT::nxv16f32, 9}, + {Intrinsic::floor, MVT::v2f64, 9}, + {Intrinsic::floor, MVT::v4f64, 9}, + {Intrinsic::floor, MVT::v8f64, 9}, + {Intrinsic::floor, MVT::v16f64, 9}, + {Intrinsic::floor, MVT::nxv1f64, 9}, + {Intrinsic::floor, MVT::nxv2f64, 9}, + {Intrinsic::floor, MVT::nxv4f64, 9}, + {Intrinsic::floor, MVT::nxv8f64, 9}, + {Intrinsic::ceil, MVT::v2f32, 9}, + {Intrinsic::ceil, MVT::v4f32, 9}, + {Intrinsic::ceil, MVT::v8f32, 9}, + {Intrinsic::ceil, MVT::v16f32, 9}, + {Intrinsic::ceil, MVT::nxv2f32, 9}, + {Intrinsic::ceil, MVT::nxv4f32, 9}, + {Intrinsic::ceil, MVT::nxv8f32, 9}, + {Intrinsic::ceil, MVT::nxv16f32, 9}, + {Intrinsic::ceil, MVT::v2f64, 9}, + {Intrinsic::ceil, MVT::v4f64, 9}, + {Intrinsic::ceil, MVT::v8f64, 9}, + {Intrinsic::ceil, MVT::v16f64, 9}, + {Intrinsic::ceil, MVT::nxv1f64, 9}, + {Intrinsic::ceil, MVT::nxv2f64, 9}, + {Intrinsic::ceil, MVT::nxv4f64, 9}, + {Intrinsic::ceil, MVT::nxv8f64, 9}, {Intrinsic::trunc, MVT::v2f32, 7}, {Intrinsic::trunc, MVT::v4f32, 7}, {Intrinsic::trunc, MVT::v8f32, 7}, diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -4,23 +4,23 @@ define void @floor() { ; CHECK-LABEL: 'floor' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.floor.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call @llvm.floor.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call @llvm.floor.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %8 = call @llvm.floor.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %9 = call @llvm.floor.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.floor.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.floor.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.floor.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.floor.nxv16f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call double @llvm.floor.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %11 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %12 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %15 = call @llvm.floor.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %16 = call @llvm.floor.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %17 = call @llvm.floor.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %18 = call @llvm.floor.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call @llvm.floor.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.floor.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.floor.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.floor.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call float @llvm.floor.f32(float undef) @@ -47,23 +47,23 @@ define void @ceil() { ; CHECK-LABEL: 'ceil' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.ceil.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call @llvm.ceil.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call @llvm.ceil.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %8 = call @llvm.ceil.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %9 = call @llvm.ceil.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.ceil.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.ceil.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.ceil.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.ceil.nxv16f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call double @llvm.ceil.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %11 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %12 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %15 = call @llvm.ceil.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %16 = call @llvm.ceil.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %17 = call @llvm.ceil.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %18 = call @llvm.ceil.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call @llvm.ceil.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.ceil.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.ceil.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.ceil.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call float @llvm.ceil.f32(float undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -11,18 +11,12 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI0_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) ret %a @@ -36,18 +30,12 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI1_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f16( %x) ret %a @@ -61,18 +49,12 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI2_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) ret %a @@ -85,19 +67,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v10, v12, ft0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI3_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f16( %x) ret %a @@ -110,19 +86,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v12, v16, ft0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI4_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv16f16( %x) ret %a @@ -135,19 +105,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v16, v24, ft0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI5_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv32f16( %x) ret %a @@ -161,18 +125,12 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI6_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) ret %a @@ -186,18 +144,12 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI7_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f32( %x) ret %a @@ -210,19 +162,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v10, v12, ft0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI8_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) ret %a @@ -235,19 +181,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v12, v16, ft0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI9_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f32( %x) ret %a @@ -260,19 +200,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v16, v24, ft0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI10_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv16f32( %x) ret %a @@ -286,18 +220,12 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI11_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) ret %a @@ -310,19 +238,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v10, v12, ft0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI12_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f64( %x) ret %a @@ -335,19 +257,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v12, v16, ft0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI13_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) ret %a @@ -360,19 +276,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v16, v24, ft0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI14_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -11,18 +11,12 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI0_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f16( %x) ret %a @@ -36,18 +30,12 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI1_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f16( %x) ret %a @@ -61,18 +49,12 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI2_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f16( %x) ret %a @@ -85,19 +67,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v10, v12, ft0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI3_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f16( %x) ret %a @@ -110,19 +86,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v12, v16, ft0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI4_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv16f16( %x) ret %a @@ -135,19 +105,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v16, v24, ft0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI5_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv32f16( %x) ret %a @@ -161,18 +125,12 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI6_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f32( %x) ret %a @@ -186,18 +144,12 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI7_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f32( %x) ret %a @@ -210,19 +162,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v10, v12, ft0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI8_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f32( %x) ret %a @@ -235,19 +181,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v12, v16, ft0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI9_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f32( %x) ret %a @@ -260,19 +200,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v16, v24, ft0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI10_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv16f32( %x) ret %a @@ -286,18 +220,12 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v9, v9, ft0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI11_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v10, v9 -; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f64( %x) ret %a @@ -310,19 +238,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v10, v12, ft0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI12_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f64( %x) ret %a @@ -335,19 +257,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v12, v16, ft0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI13_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f64( %x) ret %a @@ -360,19 +276,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v16, v24, ft0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: lui a0, %hi(.LCPI14_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0) -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -2030,23 +2030,17 @@ ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI94_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1) -; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: vmflt.vf v8, v8, ft0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI94_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI94_1)(a1) -; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t -; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x half>, <8 x half>* %x %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a) @@ -2059,23 +2053,17 @@ ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI95_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1) -; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: vmflt.vf v8, v8, ft0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI95_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI95_1)(a1) -; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t -; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) @@ -2088,23 +2076,17 @@ ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v10, (a0) +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI96_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1) -; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: vmflt.vf v8, v8, ft0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI96_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI96_1)(a1) -; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t -; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) @@ -2117,23 +2099,17 @@ ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI97_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1) -; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: vmflt.vf v8, v8, ft0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI97_1) -; CHECK-NEXT: flh ft0, %lo(.LCPI97_1)(a1) -; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t -; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x half>, <8 x half>* %x %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a) @@ -2146,23 +2122,17 @@ ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI98_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1) -; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: vmflt.vf v8, v8, ft0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI98_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI98_1)(a1) -; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t -; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) @@ -2175,23 +2145,17 @@ ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v10, (a0) +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI99_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1) -; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: vmflt.vf v8, v8, ft0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI99_1) -; CHECK-NEXT: fld ft0, %lo(.LCPI99_1)(a1) -; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t -; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)