diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -664,6 +664,10 @@ setOperationAction(ISD::FMINNUM, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); @@ -899,6 +903,10 @@ setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::FTRUNC, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + for (auto CC : VFPCCToExpand) setCondCodeAction(CC, VT, Expand); @@ -1599,6 +1607,65 @@ return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); } +// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain +// and back. Taking care to avoid converting values that are nan or already +// correct. +// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't +// have FRM dependencies modeled yet. +static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isVector() && "Unexpected type"); + + SDLoc DL(Op); + + SDValue Src = Op.getOperand(0); + + // Truncate to integer and convert back to FP. + MVT IntVT = VT.changeVectorElementTypeToInteger(); + SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src); + Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated); + + MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); + + if (Op.getOpcode() == ISD::FCEIL) { + // If the truncated value is the greater than or equal to the original + // value, we've computed the ceil. Otherwise, we went the wrong way and + // need to increase by 1. + // FIXME: This should use a masked operation. + SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated, + DAG.getConstantFP(1.0, DL, VT)); + SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT); + Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); + } else if (Op.getOpcode() == ISD::FFLOOR) { + // If the truncated value is the less than or equal to the original value, + // we've computed the floor. Otherwise, we went the wrong way and need to + // decrease by 1. + // FIXME: This should use a masked operation. + SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated, + DAG.getConstantFP(1.0, DL, VT)); + SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT); + Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); + } + + // Restore the original sign so that -0.0 is preserved. + Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src); + + // Determine the largest integer that can be represented exactly. This and + // values larger than it don't have any fractional bits so don't need to + // be converted. + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + unsigned Precision = APFloat::semanticsPrecision(FltSem); + APFloat MaxVal = APFloat(FltSem); + MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), + /*IsSigned*/false, APFloat::rmNearestTiesToEven); + SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); + + // If abs(Src) was larger than MaxVal or nan, keep it. + SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src); + SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT); + return DAG.getSelect(DL, VT, Setcc, Truncated, Src); +} + static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -2775,6 +2842,10 @@ case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return lowerFP_TO_INT_SAT(Op, DAG); + case ISD::FTRUNC: + case ISD::FCEIL: + case ISD::FFLOOR: + return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define @ceil_nxv1f16( %x) { +; CHECK-LABEL: ceil_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI0_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI0_1)(a0) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv1f16( %x) + ret %a +} +declare @llvm.ceil.nxv1f16() + +define @ceil_nxv2f16( %x) { +; CHECK-LABEL: ceil_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI1_1)(a0) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv2f16( %x) + ret %a +} +declare @llvm.ceil.nxv2f16() + +define @ceil_nxv4f16( %x) { +; CHECK-LABEL: ceil_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI2_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI2_1)(a0) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv4f16( %x) + ret %a +} +declare @llvm.ceil.nxv4f16() + +define @ceil_nxv8f16( %x) { +; CHECK-LABEL: ceil_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI3_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI3_1)(a0) +; CHECK-NEXT: vfadd.vf v12, v10, ft0 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft1 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv8f16( %x) + ret %a +} +declare @llvm.ceil.nxv8f16() + +define @ceil_nxv16f16( %x) { +; CHECK-LABEL: ceil_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI4_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI4_1)(a0) +; CHECK-NEXT: vfadd.vf v16, v12, ft0 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft1 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv16f16( %x) + ret %a +} +declare @llvm.ceil.nxv16f16() + +define @ceil_nxv32f16( %x) { +; CHECK-LABEL: ceil_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI5_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI5_1)(a0) +; CHECK-NEXT: vfadd.vf v24, v16, ft0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfsgnjx.vv v24, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v24, ft1 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv32f16( %x) + ret %a +} +declare @llvm.ceil.nxv32f16() + +define @ceil_nxv1f32( %x) { +; CHECK-LABEL: ceil_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI6_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI6_1)(a0) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv1f32( %x) + ret %a +} +declare @llvm.ceil.nxv1f32() + +define @ceil_nxv2f32( %x) { +; CHECK-LABEL: ceil_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI7_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI7_1)(a0) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv2f32( %x) + ret %a +} +declare @llvm.ceil.nxv2f32() + +define @ceil_nxv4f32( %x) { +; CHECK-LABEL: ceil_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI8_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI8_1)(a0) +; CHECK-NEXT: vfadd.vf v12, v10, ft0 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft1 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv4f32( %x) + ret %a +} +declare @llvm.ceil.nxv4f32() + +define @ceil_nxv8f32( %x) { +; CHECK-LABEL: ceil_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI9_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI9_1)(a0) +; CHECK-NEXT: vfadd.vf v16, v12, ft0 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft1 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv8f32( %x) + ret %a +} +declare @llvm.ceil.nxv8f32() + +define @ceil_nxv16f32( %x) { +; CHECK-LABEL: ceil_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI10_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI10_1)(a0) +; CHECK-NEXT: vfadd.vf v24, v16, ft0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfsgnjx.vv v24, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v24, ft1 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv16f32( %x) + ret %a +} +declare @llvm.ceil.nxv16f32() + +define @ceil_nxv1f64( %x) { +; CHECK-LABEL: ceil_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI11_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI11_1)(a0) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv1f64( %x) + ret %a +} +declare @llvm.ceil.nxv1f64() + +define @ceil_nxv2f64( %x) { +; CHECK-LABEL: ceil_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI12_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI12_1)(a0) +; CHECK-NEXT: vfadd.vf v12, v10, ft0 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft1 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv2f64( %x) + ret %a +} +declare @llvm.ceil.nxv2f64() + +define @ceil_nxv4f64( %x) { +; CHECK-LABEL: ceil_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI13_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI13_1)(a0) +; CHECK-NEXT: vfadd.vf v16, v12, ft0 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft1 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv4f64( %x) + ret %a +} +declare @llvm.ceil.nxv4f64() + +define @ceil_nxv8f64( %x) { +; CHECK-LABEL: ceil_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI14_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI14_1)(a0) +; CHECK-NEXT: vfadd.vf v24, v16, ft0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfsgnjx.vv v24, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v24, ft1 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.ceil.nxv8f64( %x) + ret %a +} +declare @llvm.ceil.nxv8f64() diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define @floor_nxv1f16( %x) { +; CHECK-LABEL: floor_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI0_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI0_1)(a0) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv1f16( %x) + ret %a +} +declare @llvm.floor.nxv1f16() + +define @floor_nxv2f16( %x) { +; CHECK-LABEL: floor_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI1_1)(a0) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv2f16( %x) + ret %a +} +declare @llvm.floor.nxv2f16() + +define @floor_nxv4f16( %x) { +; CHECK-LABEL: floor_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI2_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI2_1)(a0) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv4f16( %x) + ret %a +} +declare @llvm.floor.nxv4f16() + +define @floor_nxv8f16( %x) { +; CHECK-LABEL: floor_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI3_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI3_1)(a0) +; CHECK-NEXT: vfsub.vf v12, v10, ft0 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft1 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv8f16( %x) + ret %a +} +declare @llvm.floor.nxv8f16() + +define @floor_nxv16f16( %x) { +; CHECK-LABEL: floor_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI4_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI4_1)(a0) +; CHECK-NEXT: vfsub.vf v16, v12, ft0 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft1 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv16f16( %x) + ret %a +} +declare @llvm.floor.nxv16f16() + +define @floor_nxv32f16( %x) { +; CHECK-LABEL: floor_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI5_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI5_1)(a0) +; CHECK-NEXT: vfsub.vf v24, v16, ft0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfsgnjx.vv v24, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v24, ft1 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv32f16( %x) + ret %a +} +declare @llvm.floor.nxv32f16() + +define @floor_nxv1f32( %x) { +; CHECK-LABEL: floor_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI6_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI6_1)(a0) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv1f32( %x) + ret %a +} +declare @llvm.floor.nxv1f32() + +define @floor_nxv2f32( %x) { +; CHECK-LABEL: floor_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI7_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI7_1)(a0) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv2f32( %x) + ret %a +} +declare @llvm.floor.nxv2f32() + +define @floor_nxv4f32( %x) { +; CHECK-LABEL: floor_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI8_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI8_1)(a0) +; CHECK-NEXT: vfsub.vf v12, v10, ft0 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft1 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv4f32( %x) + ret %a +} +declare @llvm.floor.nxv4f32() + +define @floor_nxv8f32( %x) { +; CHECK-LABEL: floor_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI9_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI9_1)(a0) +; CHECK-NEXT: vfsub.vf v16, v12, ft0 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft1 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv8f32( %x) + ret %a +} +declare @llvm.floor.nxv8f32() + +define @floor_nxv16f32( %x) { +; CHECK-LABEL: floor_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI10_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI10_1)(a0) +; CHECK-NEXT: vfsub.vf v24, v16, ft0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfsgnjx.vv v24, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v24, ft1 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv16f32( %x) + ret %a +} +declare @llvm.floor.nxv16f32() + +define @floor_nxv1f64( %x) { +; CHECK-LABEL: floor_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a0, %hi(.LCPI11_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI11_1)(a0) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv1f64( %x) + ret %a +} +declare @llvm.floor.nxv1f64() + +define @floor_nxv2f64( %x) { +; CHECK-LABEL: floor_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: lui a0, %hi(.LCPI12_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI12_1)(a0) +; CHECK-NEXT: vfsub.vf v12, v10, ft0 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft1 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv2f64( %x) + ret %a +} +declare @llvm.floor.nxv2f64() + +define @floor_nxv4f64( %x) { +; CHECK-LABEL: floor_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: lui a0, %hi(.LCPI13_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI13_1)(a0) +; CHECK-NEXT: vfsub.vf v16, v12, ft0 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft1 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv4f64( %x) + ret %a +} +declare @llvm.floor.nxv4f64() + +define @floor_nxv8f64( %x) { +; CHECK-LABEL: floor_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: lui a0, %hi(.LCPI14_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI14_1)(a0) +; CHECK-NEXT: vfsub.vf v24, v16, ft0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfsgnjx.vv v24, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v24, ft1 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.floor.nxv8f64( %x) + ret %a +} +declare @llvm.floor.nxv8f64() diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1963,3 +1963,231 @@ store <2 x double> %e, <2 x double>* %x ret void } + +define void @trunc_v8f16(<8 x half>* %x) { +; CHECK-LABEL: trunc_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI91_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI91_0)(a1) +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a) + store <8 x half> %b, <8 x half>* %x + ret void +} +declare <8 x half> @llvm.trunc.v8f16(<8 x half>) + +define void @trunc_v4f32(<4 x float>* %x) { +; CHECK-LABEL: trunc_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI92_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI92_0)(a1) +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) + store <4 x float> %b, <4 x float>* %x + ret void +} +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) + +define void @trunc_v2f64(<2 x double>* %x) { +; CHECK-LABEL: trunc_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI93_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI93_0)(a1) +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) + store <2 x double> %b, <2 x double>* %x + ret void +} +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) + +define void @ceil_v8f16(<8 x half>* %x) { +; CHECK-LABEL: ceil_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI94_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a1, %hi(.LCPI94_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI94_1)(a1) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a) + store <8 x half> %b, <8 x half>* %x + ret void +} +declare <8 x half> @llvm.ceil.v8f16(<8 x half>) + +define void @ceil_v4f32(<4 x float>* %x) { +; CHECK-LABEL: ceil_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI95_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a1, %hi(.LCPI95_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI95_1)(a1) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) + store <4 x float> %b, <4 x float>* %x + ret void +} +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) + +define void @ceil_v2f64(<2 x double>* %x) { +; CHECK-LABEL: ceil_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI96_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1) +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: lui a1, %hi(.LCPI96_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI96_1)(a1) +; CHECK-NEXT: vfadd.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) + store <2 x double> %b, <2 x double>* %x + ret void +} +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) + +define void @floor_v8f16(<8 x half>* %x) { +; CHECK-LABEL: floor_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI97_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI97_1) +; CHECK-NEXT: flh ft1, %lo(.LCPI97_1)(a1) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a) + store <8 x half> %b, <8 x half>* %x + ret void +} +declare <8 x half> @llvm.floor.v8f16(<8 x half>) + +define void @floor_v4f32(<4 x float>* %x) { +; CHECK-LABEL: floor_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI98_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI98_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI98_1)(a1) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) + store <4 x float> %b, <4 x float>* %x + ret void +} +declare <4 x float> @llvm.floor.v4f32(<4 x float>) + +define void @floor_v2f64(<2 x double>* %x) { +; CHECK-LABEL: floor_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI99_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1) +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: lui a1, %hi(.LCPI99_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI99_1)(a1) +; CHECK-NEXT: vfsub.vf v10, v9, ft0 +; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft1 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) + store <2 x double> %b, <2 x double>* %x + ret void +} +declare <2 x double> @llvm.floor.v2f64(<2 x double>) diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll @@ -0,0 +1,275 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define @trunc_nxv1f16( %x) { +; CHECK-LABEL: trunc_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv1f16( %x) + ret %a +} +declare @llvm.trunc.nxv1f16() + +define @trunc_nxv2f16( %x) { +; CHECK-LABEL: trunc_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv2f16( %x) + ret %a +} +declare @llvm.trunc.nxv2f16() + +define @trunc_nxv4f16( %x) { +; CHECK-LABEL: trunc_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv4f16( %x) + ret %a +} +declare @llvm.trunc.nxv4f16() + +define @trunc_nxv8f16( %x) { +; CHECK-LABEL: trunc_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv8f16( %x) + ret %a +} +declare @llvm.trunc.nxv8f16() + +define @trunc_nxv16f16( %x) { +; CHECK-LABEL: trunc_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv16f16( %x) + ret %a +} +declare @llvm.trunc.nxv16f16() + +define @trunc_nxv32f16( %x) { +; CHECK-LABEL: trunc_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv32f16( %x) + ret %a +} +declare @llvm.trunc.nxv32f16() + +define @trunc_nxv1f32( %x) { +; CHECK-LABEL: trunc_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv1f32( %x) + ret %a +} +declare @llvm.trunc.nxv1f32() + +define @trunc_nxv2f32( %x) { +; CHECK-LABEL: trunc_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv2f32( %x) + ret %a +} +declare @llvm.trunc.nxv2f32() + +define @trunc_nxv4f32( %x) { +; CHECK-LABEL: trunc_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv4f32( %x) + ret %a +} +declare @llvm.trunc.nxv4f32() + +define @trunc_nxv8f32( %x) { +; CHECK-LABEL: trunc_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv8f32( %x) + ret %a +} +declare @llvm.trunc.nxv8f32() + +define @trunc_nxv16f32( %x) { +; CHECK-LABEL: trunc_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv16f32( %x) + ret %a +} +declare @llvm.trunc.nxv16f32() + +define @trunc_nxv1f64( %x) { +; CHECK-LABEL: trunc_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vv v9, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v9, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v9, v9 +; CHECK-NEXT: vfsgnj.vv v9, v9, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv1f64( %x) + ret %a +} +declare @llvm.trunc.nxv1f64() + +define @trunc_nxv2f64( %x) { +; CHECK-LABEL: trunc_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v10, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v10, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vfcvt.f.x.v v10, v10 +; CHECK-NEXT: vfsgnj.vv v10, v10, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv2f64( %x) + ret %a +} +declare @llvm.trunc.nxv2f64() + +define @trunc_nxv4f64( %x) { +; CHECK-LABEL: trunc_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v12, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v12, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vfcvt.f.x.v v12, v12 +; CHECK-NEXT: vfsgnj.vv v12, v12, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv4f64( %x) + ret %a +} +declare @llvm.trunc.nxv4f64() + +define @trunc_nxv8f64( %x) { +; CHECK-LABEL: trunc_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vv v16, v8, v8 +; CHECK-NEXT: vmflt.vf v0, v16, ft0 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK-NEXT: vfcvt.f.x.v v16, v16 +; CHECK-NEXT: vfsgnj.vv v16, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %a = call @llvm.trunc.nxv8f64( %x) + ret %a +} +declare @llvm.trunc.nxv8f64()