diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -0,0 +1,1309 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV64 + +; ================================================================================ +; trunc +; ================================================================================ + +declare @llvm.trunc.nxv1f64() + +define @trunc_nxv1f64_to_si8( %x) { +; RV32-LABEL: trunc_nxv1f64_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI0_0) +; RV32-NEXT: fld ft0, %lo(.LCPI0_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI0_0) +; RV64-NEXT: fld ft0, %lo(.LCPI0_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f64_to_ui8( %x) { +; RV32-LABEL: trunc_nxv1f64_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI1_0) +; RV32-NEXT: fld ft0, %lo(.LCPI1_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI1_0) +; RV64-NEXT: fld ft0, %lo(.LCPI1_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f64_to_si16( %x) { +; RV32-LABEL: trunc_nxv1f64_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI2_0) +; RV32-NEXT: fld ft0, %lo(.LCPI2_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI2_0) +; RV64-NEXT: fld ft0, %lo(.LCPI2_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f64_to_ui16( %x) { +; RV32-LABEL: trunc_nxv1f64_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-NEXT: fld ft0, %lo(.LCPI3_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: fld ft0, %lo(.LCPI3_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f64_to_si32( %x) { +; RV32-LABEL: trunc_nxv1f64_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI4_0) +; RV32-NEXT: fld ft0, %lo(.LCPI4_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI4_0) +; RV64-NEXT: fld ft0, %lo(.LCPI4_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f64_to_ui32( %x) { +; RV32-LABEL: trunc_nxv1f64_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI5_0) +; RV32-NEXT: fld ft0, %lo(.LCPI5_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI5_0) +; RV64-NEXT: fld ft0, %lo(.LCPI5_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f64_to_si64( %x) { +; RV32-LABEL: trunc_nxv1f64_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI6_0) +; RV32-NEXT: fld ft0, %lo(.LCPI6_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI6_0) +; RV64-NEXT: fld ft0, %lo(.LCPI6_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f64_to_ui64( %x) { +; RV32-LABEL: trunc_nxv1f64_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI7_0) +; RV32-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f64_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI7_0) +; RV64-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; trunc +; ================================================================================ + +declare @llvm.trunc.nxv4f64() + +define @trunc_nxv4f64_to_si8( %x) { +; RV32-LABEL: trunc_nxv4f64_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI8_0) +; RV32-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI8_0) +; RV64-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f64_to_ui8( %x) { +; RV32-LABEL: trunc_nxv4f64_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI9_0) +; RV32-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI9_0) +; RV64-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f64_to_si16( %x) { +; RV32-LABEL: trunc_nxv4f64_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI10_0) +; RV32-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI10_0) +; RV64-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f64_to_ui16( %x) { +; RV32-LABEL: trunc_nxv4f64_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI11_0) +; RV64-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f64_to_si32( %x) { +; RV32-LABEL: trunc_nxv4f64_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI12_0) +; RV64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f64_to_ui32( %x) { +; RV32-LABEL: trunc_nxv4f64_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI13_0) +; RV64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f64_to_si64( %x) { +; RV32-LABEL: trunc_nxv4f64_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: fld ft0, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI14_0) +; RV64-NEXT: fld ft0, %lo(.LCPI14_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f64_to_ui64( %x) { +; RV32-LABEL: trunc_nxv4f64_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI15_0) +; RV32-NEXT: fld ft0, %lo(.LCPI15_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f64_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI15_0) +; RV64-NEXT: fld ft0, %lo(.LCPI15_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; ceil +; ================================================================================ + +declare @llvm.ceil.nxv1f64() + +define @ceil_nxv1f64_to_si8( %x) { +; RV32-LABEL: ceil_nxv1f64_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI16_0) +; RV32-NEXT: fld ft0, %lo(.LCPI16_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI16_0) +; RV64-NEXT: fld ft0, %lo(.LCPI16_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f64_to_ui8( %x) { +; RV32-LABEL: ceil_nxv1f64_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI17_0) +; RV32-NEXT: fld ft0, %lo(.LCPI17_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI17_0) +; RV64-NEXT: fld ft0, %lo(.LCPI17_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f64_to_si16( %x) { +; RV32-LABEL: ceil_nxv1f64_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI18_0) +; RV32-NEXT: fld ft0, %lo(.LCPI18_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: fld ft0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f64_to_ui16( %x) { +; RV32-LABEL: ceil_nxv1f64_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI19_0) +; RV32-NEXT: fld ft0, %lo(.LCPI19_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: fld ft0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f64_to_si32( %x) { +; RV32-LABEL: ceil_nxv1f64_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-NEXT: fld ft0, %lo(.LCPI20_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: fld ft0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f64_to_ui32( %x) { +; RV32-LABEL: ceil_nxv1f64_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI21_0) +; RV32-NEXT: fld ft0, %lo(.LCPI21_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: fld ft0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f64_to_si64( %x) { +; RV32-LABEL: ceil_nxv1f64_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-NEXT: fld ft0, %lo(.LCPI22_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI22_0) +; RV64-NEXT: fld ft0, %lo(.LCPI22_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f64_to_ui64( %x) { +; RV32-LABEL: ceil_nxv1f64_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI23_0) +; RV32-NEXT: fld ft0, %lo(.LCPI23_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f64_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI23_0) +; RV64-NEXT: fld ft0, %lo(.LCPI23_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; ceil +; ================================================================================ + +declare @llvm.ceil.nxv4f64() + +define @ceil_nxv4f64_to_si8( %x) { +; RV32-LABEL: ceil_nxv4f64_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI24_0) +; RV32-NEXT: fld ft0, %lo(.LCPI24_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI24_0) +; RV64-NEXT: fld ft0, %lo(.LCPI24_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f64_to_ui8( %x) { +; RV32-LABEL: ceil_nxv4f64_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI25_0) +; RV32-NEXT: fld ft0, %lo(.LCPI25_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI25_0) +; RV64-NEXT: fld ft0, %lo(.LCPI25_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f64_to_si16( %x) { +; RV32-LABEL: ceil_nxv4f64_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI26_0) +; RV32-NEXT: fld ft0, %lo(.LCPI26_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI26_0) +; RV64-NEXT: fld ft0, %lo(.LCPI26_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f64_to_ui16( %x) { +; RV32-LABEL: ceil_nxv4f64_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI27_0) +; RV32-NEXT: fld ft0, %lo(.LCPI27_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI27_0) +; RV64-NEXT: fld ft0, %lo(.LCPI27_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f64_to_si32( %x) { +; RV32-LABEL: ceil_nxv4f64_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI28_0) +; RV32-NEXT: fld ft0, %lo(.LCPI28_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI28_0) +; RV64-NEXT: fld ft0, %lo(.LCPI28_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f64_to_ui32( %x) { +; RV32-LABEL: ceil_nxv4f64_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI29_0) +; RV32-NEXT: fld ft0, %lo(.LCPI29_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI29_0) +; RV64-NEXT: fld ft0, %lo(.LCPI29_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f64_to_si64( %x) { +; RV32-LABEL: ceil_nxv4f64_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI30_0) +; RV32-NEXT: fld ft0, %lo(.LCPI30_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI30_0) +; RV64-NEXT: fld ft0, %lo(.LCPI30_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f64_to_ui64( %x) { +; RV32-LABEL: ceil_nxv4f64_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI31_0) +; RV32-NEXT: fld ft0, %lo(.LCPI31_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f64_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI31_0) +; RV64-NEXT: fld ft0, %lo(.LCPI31_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + %b = fptoui %a to + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -0,0 +1,1245 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV64 + +; ================================================================================ +; trunc +; ================================================================================ + +declare @llvm.trunc.nxv1f32() + +define @trunc_nxv1f32_to_si8( %x) { +; RV32-LABEL: trunc_nxv1f32_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI0_0) +; RV32-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI0_0) +; RV64-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f32_to_ui8( %x) { +; RV32-LABEL: trunc_nxv1f32_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI1_0) +; RV32-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI1_0) +; RV64-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f32_to_si16( %x) { +; RV32-LABEL: trunc_nxv1f32_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI2_0) +; RV32-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI2_0) +; RV64-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f32_to_ui16( %x) { +; RV32-LABEL: trunc_nxv1f32_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f32_to_si32( %x) { +; RV32-LABEL: trunc_nxv1f32_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI4_0) +; RV32-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI4_0) +; RV64-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f32_to_ui32( %x) { +; RV32-LABEL: trunc_nxv1f32_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI5_0) +; RV32-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI5_0) +; RV64-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f32_to_si64( %x) { +; RV32-LABEL: trunc_nxv1f32_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI6_0) +; RV32-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI6_0) +; RV64-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f32_to_ui64( %x) { +; RV32-LABEL: trunc_nxv1f32_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI7_0) +; RV32-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f32_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI7_0) +; RV64-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; trunc +; ================================================================================ + +declare @llvm.trunc.nxv4f32() + +define @trunc_nxv4f32_to_si8( %x) { +; RV32-LABEL: trunc_nxv4f32_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI8_0) +; RV32-NEXT: flw ft0, %lo(.LCPI8_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v10, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI8_0) +; RV64-NEXT: flw ft0, %lo(.LCPI8_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v10, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f32_to_ui8( %x) { +; RV32-LABEL: trunc_nxv4f32_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI9_0) +; RV32-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v10, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI9_0) +; RV64-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v10, 0 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f32_to_si16( %x) { +; RV32-LABEL: trunc_nxv4f32_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI10_0) +; RV32-NEXT: flw ft0, %lo(.LCPI10_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI10_0) +; RV64-NEXT: flw ft0, %lo(.LCPI10_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f32_to_ui16( %x) { +; RV32-LABEL: trunc_nxv4f32_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI11_0) +; RV64-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f32_to_si32( %x) { +; RV32-LABEL: trunc_nxv4f32_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: flw ft0, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI12_0) +; RV64-NEXT: flw ft0, %lo(.LCPI12_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f32_to_ui32( %x) { +; RV32-LABEL: trunc_nxv4f32_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: flw ft0, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI13_0) +; RV64-NEXT: flw ft0, %lo(.LCPI13_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f32_to_si64( %x) { +; RV32-LABEL: trunc_nxv4f32_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: flw ft0, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI14_0) +; RV64-NEXT: flw ft0, %lo(.LCPI14_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f32_to_ui64( %x) { +; RV32-LABEL: trunc_nxv4f32_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI15_0) +; RV32-NEXT: flw ft0, %lo(.LCPI15_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f32_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI15_0) +; RV64-NEXT: flw ft0, %lo(.LCPI15_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; ceil +; ================================================================================ + +declare @llvm.ceil.nxv1f32() + +define @ceil_nxv1f32_to_si8( %x) { +; RV32-LABEL: ceil_nxv1f32_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI16_0) +; RV32-NEXT: flw ft0, %lo(.LCPI16_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI16_0) +; RV64-NEXT: flw ft0, %lo(.LCPI16_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f32_to_ui8( %x) { +; RV32-LABEL: ceil_nxv1f32_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI17_0) +; RV32-NEXT: flw ft0, %lo(.LCPI17_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI17_0) +; RV64-NEXT: flw ft0, %lo(.LCPI17_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f32_to_si16( %x) { +; RV32-LABEL: ceil_nxv1f32_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI18_0) +; RV32-NEXT: flw ft0, %lo(.LCPI18_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: flw ft0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f32_to_ui16( %x) { +; RV32-LABEL: ceil_nxv1f32_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI19_0) +; RV32-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f32_to_si32( %x) { +; RV32-LABEL: ceil_nxv1f32_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-NEXT: flw ft0, %lo(.LCPI20_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: flw ft0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f32_to_ui32( %x) { +; RV32-LABEL: ceil_nxv1f32_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI21_0) +; RV32-NEXT: flw ft0, %lo(.LCPI21_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: flw ft0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f32_to_si64( %x) { +; RV32-LABEL: ceil_nxv1f32_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-NEXT: flw ft0, %lo(.LCPI22_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI22_0) +; RV64-NEXT: flw ft0, %lo(.LCPI22_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f32_to_ui64( %x) { +; RV32-LABEL: ceil_nxv1f32_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI23_0) +; RV32-NEXT: flw ft0, %lo(.LCPI23_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f32_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI23_0) +; RV64-NEXT: flw ft0, %lo(.LCPI23_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; ceil +; ================================================================================ + +declare @llvm.ceil.nxv4f32() + +define @ceil_nxv4f32_to_si8( %x) { +; RV32-LABEL: ceil_nxv4f32_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI24_0) +; RV32-NEXT: flw ft0, %lo(.LCPI24_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v10, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI24_0) +; RV64-NEXT: flw ft0, %lo(.LCPI24_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v10, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f32_to_ui8( %x) { +; RV32-LABEL: ceil_nxv4f32_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI25_0) +; RV32-NEXT: flw ft0, %lo(.LCPI25_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v10, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI25_0) +; RV64-NEXT: flw ft0, %lo(.LCPI25_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v10, 0 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f32_to_si16( %x) { +; RV32-LABEL: ceil_nxv4f32_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI26_0) +; RV32-NEXT: flw ft0, %lo(.LCPI26_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI26_0) +; RV64-NEXT: flw ft0, %lo(.LCPI26_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f32_to_ui16( %x) { +; RV32-LABEL: ceil_nxv4f32_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI27_0) +; RV32-NEXT: flw ft0, %lo(.LCPI27_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI27_0) +; RV64-NEXT: flw ft0, %lo(.LCPI27_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f32_to_si32( %x) { +; RV32-LABEL: ceil_nxv4f32_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI28_0) +; RV32-NEXT: flw ft0, %lo(.LCPI28_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI28_0) +; RV64-NEXT: flw ft0, %lo(.LCPI28_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f32_to_ui32( %x) { +; RV32-LABEL: ceil_nxv4f32_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI29_0) +; RV32-NEXT: flw ft0, %lo(.LCPI29_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI29_0) +; RV64-NEXT: flw ft0, %lo(.LCPI29_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f32_to_si64( %x) { +; RV32-LABEL: ceil_nxv4f32_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI30_0) +; RV32-NEXT: flw ft0, %lo(.LCPI30_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI30_0) +; RV64-NEXT: flw ft0, %lo(.LCPI30_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f32_to_ui64( %x) { +; RV32-LABEL: ceil_nxv4f32_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI31_0) +; RV32-NEXT: flw ft0, %lo(.LCPI31_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f32_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI31_0) +; RV64-NEXT: flw ft0, %lo(.LCPI31_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + %b = fptoui %a to + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -0,0 +1,1229 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=RV64 + +; ================================================================================ +; trunc +; ================================================================================ + +declare @llvm.trunc.nxv1f16() + +define @trunc_nxv1f16_to_si8( %x) { +; RV32-LABEL: trunc_nxv1f16_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI0_0) +; RV32-NEXT: flh ft0, %lo(.LCPI0_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI0_0) +; RV64-NEXT: flh ft0, %lo(.LCPI0_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f16_to_ui8( %x) { +; RV32-LABEL: trunc_nxv1f16_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI1_0) +; RV32-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI1_0) +; RV64-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f16_to_si16( %x) { +; RV32-LABEL: trunc_nxv1f16_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI2_0) +; RV32-NEXT: flh ft0, %lo(.LCPI2_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI2_0) +; RV64-NEXT: flh ft0, %lo(.LCPI2_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f16_to_ui16( %x) { +; RV32-LABEL: trunc_nxv1f16_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-NEXT: flh ft0, %lo(.LCPI3_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: flh ft0, %lo(.LCPI3_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f16_to_si32( %x) { +; RV32-LABEL: trunc_nxv1f16_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI4_0) +; RV32-NEXT: flh ft0, %lo(.LCPI4_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI4_0) +; RV64-NEXT: flh ft0, %lo(.LCPI4_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f16_to_ui32( %x) { +; RV32-LABEL: trunc_nxv1f16_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI5_0) +; RV32-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI5_0) +; RV64-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv1f16_to_si64( %x) { +; RV32-LABEL: trunc_nxv1f16_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI6_0) +; RV32-NEXT: flh ft0, %lo(.LCPI6_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v9, v8 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI6_0) +; RV64-NEXT: flh ft0, %lo(.LCPI6_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v9, v8 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv1f16_to_ui64( %x) { +; RV32-LABEL: trunc_nxv1f16_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI7_0) +; RV32-NEXT: flh ft0, %lo(.LCPI7_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v9, v8 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv1f16_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI7_0) +; RV64-NEXT: flh ft0, %lo(.LCPI7_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v9, v8 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; trunc +; ================================================================================ + +declare @llvm.trunc.nxv4f16() + +define @trunc_nxv4f16_to_si8( %x) { +; RV32-LABEL: trunc_nxv4f16_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI8_0) +; RV32-NEXT: flh ft0, %lo(.LCPI8_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI8_0) +; RV64-NEXT: flh ft0, %lo(.LCPI8_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f16_to_ui8( %x) { +; RV32-LABEL: trunc_nxv4f16_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI9_0) +; RV32-NEXT: flh ft0, %lo(.LCPI9_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI9_0) +; RV64-NEXT: flh ft0, %lo(.LCPI9_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f16_to_si16( %x) { +; RV32-LABEL: trunc_nxv4f16_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI10_0) +; RV32-NEXT: flh ft0, %lo(.LCPI10_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI10_0) +; RV64-NEXT: flh ft0, %lo(.LCPI10_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f16_to_ui16( %x) { +; RV32-LABEL: trunc_nxv4f16_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI11_0) +; RV32-NEXT: flh ft0, %lo(.LCPI11_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI11_0) +; RV64-NEXT: flh ft0, %lo(.LCPI11_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f16_to_si32( %x) { +; RV32-LABEL: trunc_nxv4f16_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: flh ft0, %lo(.LCPI12_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI12_0) +; RV64-NEXT: flh ft0, %lo(.LCPI12_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f16_to_ui32( %x) { +; RV32-LABEL: trunc_nxv4f16_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: flh ft0, %lo(.LCPI13_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI13_0) +; RV64-NEXT: flh ft0, %lo(.LCPI13_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @trunc_nxv4f16_to_si64( %x) { +; RV32-LABEL: trunc_nxv4f16_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v12, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwcvt.rtz.x.f.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI14_0) +; RV64-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v12, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.rtz.x.f.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @trunc_nxv4f16_to_ui64( %x) { +; RV32-LABEL: trunc_nxv4f16_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI15_0) +; RV32-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v12, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwcvt.rtz.xu.f.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: trunc_nxv4f16_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI15_0) +; RV64-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v12, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.rtz.xu.f.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; ceil +; ================================================================================ + +declare @llvm.ceil.nxv1f16() + +define @ceil_nxv1f16_to_si8( %x) { +; RV32-LABEL: ceil_nxv1f16_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI16_0) +; RV32-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI16_0) +; RV64-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f16_to_ui8( %x) { +; RV32-LABEL: ceil_nxv1f16_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI17_0) +; RV32-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI17_0) +; RV64-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f16_to_si16( %x) { +; RV32-LABEL: ceil_nxv1f16_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI18_0) +; RV32-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f16_to_ui16( %x) { +; RV32-LABEL: ceil_nxv1f16_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI19_0) +; RV32-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f16_to_si32( %x) { +; RV32-LABEL: ceil_nxv1f16_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f16_to_ui32( %x) { +; RV32-LABEL: ceil_nxv1f16_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI21_0) +; RV32-NEXT: flh ft0, %lo(.LCPI21_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: flh ft0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv1f16_to_si64( %x) { +; RV32-LABEL: ceil_nxv1f16_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-NEXT: flh ft0, %lo(.LCPI22_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v9, v8 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI22_0) +; RV64-NEXT: flh ft0, %lo(.LCPI22_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v9, v8 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv1f16_to_ui64( %x) { +; RV32-LABEL: ceil_nxv1f16_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI23_0) +; RV32-NEXT: flh ft0, %lo(.LCPI23_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v9, v8 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv1f16_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI23_0) +; RV64-NEXT: flh ft0, %lo(.LCPI23_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v9, v8 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; ceil +; ================================================================================ + +declare @llvm.ceil.nxv4f16() + +define @ceil_nxv4f16_to_si8( %x) { +; RV32-LABEL: ceil_nxv4f16_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI24_0) +; RV32-NEXT: flh ft0, %lo(.LCPI24_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI24_0) +; RV64-NEXT: flh ft0, %lo(.LCPI24_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f16_to_ui8( %x) { +; RV32-LABEL: ceil_nxv4f16_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI25_0) +; RV32-NEXT: flh ft0, %lo(.LCPI25_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI25_0) +; RV64-NEXT: flh ft0, %lo(.LCPI25_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f16_to_si16( %x) { +; RV32-LABEL: ceil_nxv4f16_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI26_0) +; RV32-NEXT: flh ft0, %lo(.LCPI26_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI26_0) +; RV64-NEXT: flh ft0, %lo(.LCPI26_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f16_to_ui16( %x) { +; RV32-LABEL: ceil_nxv4f16_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI27_0) +; RV32-NEXT: flh ft0, %lo(.LCPI27_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI27_0) +; RV64-NEXT: flh ft0, %lo(.LCPI27_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f16_to_si32( %x) { +; RV32-LABEL: ceil_nxv4f16_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI28_0) +; RV32-NEXT: flh ft0, %lo(.LCPI28_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI28_0) +; RV64-NEXT: flh ft0, %lo(.LCPI28_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f16_to_ui32( %x) { +; RV32-LABEL: ceil_nxv4f16_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI29_0) +; RV32-NEXT: flh ft0, %lo(.LCPI29_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI29_0) +; RV64-NEXT: flh ft0, %lo(.LCPI29_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @ceil_nxv4f16_to_si64( %x) { +; RV32-LABEL: ceil_nxv4f16_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI30_0) +; RV32-NEXT: flh ft0, %lo(.LCPI30_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v12, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwcvt.rtz.x.f.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI30_0) +; RV64-NEXT: flh ft0, %lo(.LCPI30_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v12, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.rtz.x.f.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @ceil_nxv4f16_to_ui64( %x) { +; RV32-LABEL: ceil_nxv4f16_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI31_0) +; RV32-NEXT: flh ft0, %lo(.LCPI31_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: fsrm a0 +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v12, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwcvt.rtz.xu.f.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: ceil_nxv4f16_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI31_0) +; RV64-NEXT: flh ft0, %lo(.LCPI31_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: fsrm a0 +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v12, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.rtz.xu.f.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + %b = fptoui %a to + ret %b +}