diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -239,8 +239,10 @@ VFCVT_RTZ_X_F_VL, VFCVT_RTZ_XU_F_VL, VFCVT_X_F_VL, + VFCVT_XU_F_VL, VFROUND_NOEXCEPT_VL, VFCVT_RM_X_F_VL, // Has a rounding mode operand. + VFCVT_RM_XU_F_VL, // Has a rounding mode operand. SINT_TO_FP_VL, UINT_TO_FP_VL, FP_ROUND_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1914,7 +1914,8 @@ // Gets the two common "VL" operands: an all-ones mask and the vector length. // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is -// the vector type that it is contained in. +// the vector type that the fixed-length vector is contained in. Otherwise if +// VecVT is scalable, then ContainerVT should be the same as VecVT. static std::pair getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { @@ -9491,12 +9492,6 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT XLenVT = Subtarget.getXLenVT(); - // Only handle XLen or i32 types. Other types narrower than XLen will - // eventually be legalized to XLenVT. - EVT VT = N->getValueType(0); - if (VT != MVT::i32 && VT != XLenVT) - return SDValue(); - SDValue Src = N->getOperand(0); // Ensure the FP type is also legal. @@ -9511,7 +9506,52 @@ if (FRM == RISCVFPRndMode::Invalid) return SDValue(); + SDLoc DL(N); bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; + EVT VT = N->getValueType(0); + + if (VT.isVector()) { + MVT SrcVT = Src.getSimpleValueType(); + MVT SrcContainerVT = SrcVT; + MVT ContainerVT = VT.getSimpleVT(); + SDValue XVal = Src.getOperand(0); + + // Make fixed-length vectors scalable first + if (SrcVT.isFixedLengthVector()) { + SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); + XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); + ContainerVT = + getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); + } + + auto [Mask, VL] = + getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); + + SDValue FpToInt; + if (FRM == RISCVFPRndMode::RTZ) { + // Use the dedicated trunc static rounding mode if we're truncating so we + // don't need to generate calls to fsrmi/fsrm + unsigned Opc = + IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; + FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); + } else { + unsigned Opc = + IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; + FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, + DAG.getTargetConstant(FRM, DL, XLenVT), VL); + } + + // If converted from fixed-length to scalable, convert back + if (VT.isFixedLengthVector()) + FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); + + return FpToInt; + } + + // Only handle XLen or i32 types. Other types narrower than XLen will + // eventually be legalized to XLenVT. + if (VT != MVT::i32 && VT != XLenVT) + return SDValue(); unsigned Opc; if (VT == XLenVT) @@ -9519,7 +9559,6 @@ else Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; - SDLoc DL(N); SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), DAG.getTargetConstant(FRM, DL, XLenVT)); return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); @@ -11535,6 +11574,18 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, RISCV::PseudoVFCVT_F_X_V_M1_MASK); @@ -13127,7 +13178,9 @@ NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) NODE_NAME_CASE(VFCVT_RM_X_F_VL) + NODE_NAME_CASE(VFCVT_RM_XU_F_VL) NODE_NAME_CASE(VFCVT_X_F_VL) + NODE_NAME_CASE(VFCVT_XU_F_VL) NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) NODE_NAME_CASE(SINT_TO_FP_VL) NODE_NAME_CASE(UINT_TO_FP_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -5489,6 +5489,7 @@ } defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; +defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V; defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V; defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V; let Uses = [FRM] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -153,7 +153,9 @@ ]>; def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtF2XOp_VL>; +def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVVecCvtF2XOp_VL>; def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>; +def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>; def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>; def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", @@ -1708,7 +1710,9 @@ // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions defm : VPatConvertFP2I_RM_VL_V; + defm : VPatConvertFP2I_RM_VL_V; defm : VPatConvertFP2IVL_V; + defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertI2FPVL_V; diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV64 + +; Vector version of test/CodeGen/RISCV/double-round-conv.ll + +declare @llvm.trunc.nxv1f64() + +define @trunc_nxv1f64_to_si( %x) { +; RV32-LABEL: trunc_nxv1f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +declare <1 x double> @llvm.trunc.v1f64(<1 x double>) + +define <1 x i64> @truncv1f64_to_si(<1 x double> %x) { +; RV32-LABEL: truncv1f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: truncv1f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call <1 x double> @llvm.trunc.v1f64(<1 x double> %x) + %b = fptosi <1 x double> %a to <1 x i64> + ret <1 x i64> %b +} + +declare @llvm.trunc.nxv4f64() + +define @trunc_nxv4f64_to_si( %x) { +; RV32-LABEL: trunc_nxv4f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f64_to_ui( %x) { +; RV32-LABEL: trunc_nxv4f64_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +declare @llvm.ceil.nxv4f64() + +define @ceil_nxv4f64_to_si( %x) { +; RV32-LABEL: ceil_nxv4f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f64_to_ui( %x) { +; RV32-LABEL: ceil_nxv4f64_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) + +define <4 x i64> @ceil_v4f64_to_si(<4 x double> %x) { +; RV32-LABEL: ceil_v4f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) + %b = fptosi <4 x double> %a to <4 x i64> + ret <4 x i64> %b +} + +define <4 x i64> @ceil_v4f64_to_ui(<4 x double> %x) { +; RV32-LABEL: ceil_v4f64_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f64_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) + %b = fptoui <4 x double> %a to <4 x i64> + ret <4 x i64> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV64 + +; Vector version of test/CodeGen/RISCV/float-round-conv.ll + +declare @llvm.trunc.nxv1f32() + +define @trunc_nxv1f32_to_si( %x) { +; RV32-LABEL: trunc_nxv1f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +declare <1 x float> @llvm.trunc.v1f32(<1 x float>) + +define <1 x i32> @truncv1f32_to_si(<1 x float> %x) { +; RV32-LABEL: truncv1f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: truncv1f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call <1 x float> @llvm.trunc.v1f32(<1 x float> %x) + %b = fptosi <1 x float> %a to <1 x i32> + ret <1 x i32> %b +} + +declare @llvm.trunc.nxv4f32() + +define @trunc_nxv4f32_to_si( %x) { +; RV32-LABEL: trunc_nxv4f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f32_to_ui( %x) { +; RV32-LABEL: trunc_nxv4f32_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +declare @llvm.ceil.nxv4f32() + +define @ceil_nxv4f32_to_si( %x) { +; RV32-LABEL: ceil_nxv4f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f32_to_ui( %x) { +; RV32-LABEL: ceil_nxv4f32_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) + +define <4 x i32> @ceil_v4f32_to_si(<4 x float> %x) { +; RV32-LABEL: ceil_v4f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) + %b = fptosi <4 x float> %a to <4 x i32> + ret <4 x i32> %b +} + +define <4 x i32> @ceil_v4f32_to_ui(<4 x float> %x) { +; RV32-LABEL: ceil_v4f32_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmset.m v0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f32_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmset.m v0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) + %b = fptoui <4 x float> %a to <4 x i32> + ret <4 x i32> %b +}