diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -0,0 +1,297 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV64 + +; Vector version of test/CodeGen/RISCV/double-round-conv.ll + +declare @llvm.trunc.nxv1f64() + +define @trunc_nxv1f64_to_si( %x) { +; RV32-LABEL: trunc_nxv1f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI0_0) +; RV32-NEXT: fld ft0, %lo(.LCPI0_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI0_0) +; RV64-NEXT: fld ft0, %lo(.LCPI0_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +declare <1 x double> @llvm.trunc.v1f64(<1 x double>) + +define <1 x i64> @truncv1f64_to_si(<1 x double> %x) { +; RV32-LABEL: truncv1f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI1_0) +; RV32-NEXT: fld ft0, %lo(.LCPI1_0)(a0) +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: truncv1f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI1_0) +; RV64-NEXT: fld ft0, %lo(.LCPI1_0)(a0) +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call <1 x double> @llvm.trunc.v1f64(<1 x double> %x) + %b = fptosi <1 x double> %a to <1 x i64> + ret <1 x i64> %b +} + +declare @llvm.trunc.nxv4f64() + +define @trunc_nxv4f64_to_si( %x) { +; RV32-LABEL: trunc_nxv4f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI2_0) +; RV32-NEXT: fld ft0, %lo(.LCPI2_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI2_0) +; RV64-NEXT: fld ft0, %lo(.LCPI2_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f64_to_ui( %x) { +; RV32-LABEL: trunc_nxv4f64_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-NEXT: fld ft0, %lo(.LCPI3_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: fld ft0, %lo(.LCPI3_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +declare @llvm.ceil.nxv4f64() + +define @ceil_nxv4f64_to_si( %x) { +; RV32-LABEL: ceil_nxv4f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI4_0) +; RV32-NEXT: fld ft0, %lo(.LCPI4_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI4_0) +; RV64-NEXT: fld ft0, %lo(.LCPI4_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f64_to_ui( %x) { +; RV32-LABEL: ceil_nxv4f64_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI5_0) +; RV32-NEXT: fld ft0, %lo(.LCPI5_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI5_0) +; RV64-NEXT: fld ft0, %lo(.LCPI5_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) + +define <4 x i64> @ceil_v4f64_to_si(<4 x double> %x) { +; RV32-LABEL: ceil_v4f64_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI6_0) +; RV32-NEXT: fld ft0, %lo(.LCPI6_0)(a0) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f64_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI6_0) +; RV64-NEXT: fld ft0, %lo(.LCPI6_0)(a0) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) + %b = fptosi <4 x double> %a to <4 x i64> + ret <4 x i64> %b +} + +define <4 x i64> @ceil_v4f64_to_ui(<4 x double> %x) { +; RV32-LABEL: ceil_v4f64_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI7_0) +; RV32-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f64_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI7_0) +; RV64-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) + %b = fptoui <4 x double> %a to <4 x i64> + ret <4 x i64> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -0,0 +1,297 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV64 + +; Vector version of test/CodeGen/RISCV/float-round-conv.ll + +declare @llvm.trunc.nxv1f32() + +define @trunc_nxv1f32_to_si( %x) { +; RV32-LABEL: trunc_nxv1f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI0_0) +; RV32-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI0_0) +; RV64-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +declare <1 x float> @llvm.trunc.v1f32(<1 x float>) + +define <1 x i32> @truncv1f32_to_si(<1 x float> %x) { +; RV32-LABEL: truncv1f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI1_0) +; RV32-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: truncv1f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI1_0) +; RV64-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call <1 x float> @llvm.trunc.v1f32(<1 x float> %x) + %b = fptosi <1 x float> %a to <1 x i32> + ret <1 x i32> %b +} + +declare @llvm.trunc.nxv4f32() + +define @trunc_nxv4f32_to_si( %x) { +; RV32-LABEL: trunc_nxv4f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI2_0) +; RV32-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI2_0) +; RV64-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f32_to_ui( %x) { +; RV32-LABEL: trunc_nxv4f32_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +declare @llvm.ceil.nxv4f32() + +define @ceil_nxv4f32_to_si( %x) { +; RV32-LABEL: ceil_nxv4f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI4_0) +; RV32-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI4_0) +; RV64-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f32_to_ui( %x) { +; RV32-LABEL: ceil_nxv4f32_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI5_0) +; RV32-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI5_0) +; RV64-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) + +define <4 x i32> @ceil_v4f32_to_si(<4 x float> %x) { +; RV32-LABEL: ceil_v4f32_to_si: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI6_0) +; RV32-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f32_to_si: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI6_0) +; RV64-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) + %b = fptosi <4 x float> %a to <4 x i32> + ret <4 x i32> %b +} + +define <4 x i32> @ceil_v4f32_to_ui(<4 x float> %x) { +; RV32-LABEL: ceil_v4f32_to_ui: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI7_0) +; RV32-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_v4f32_to_ui: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI7_0) +; RV64-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) + %b = fptoui <4 x float> %a to <4 x i32> + ret <4 x i32> %b +}