diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9433,8 +9433,8 @@ "STRICT_FP_ROUND result type should be vector iff the operand " "type is vector!"); assert((!VTList.VTs[0].isVector() || - VTList.VTs[0].getVectorNumElements() == - Ops[1].getValueType().getVectorNumElements()) && + VTList.VTs[0].getVectorElementCount() == + Ops[1].getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(VTList.VTs[0].isFloatingPoint() && Ops[1].getValueType().isFloatingPoint() && diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -339,7 +339,9 @@ STRICT_VFNMADD_VL, STRICT_VFMSUB_VL, STRICT_VFNMSUB_VL, + STRICT_FP_ROUND_VL, STRICT_FP_EXTEND_VL, + STRICT_VFNCVT_ROD_VL, // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all @@ -777,7 +779,7 @@ SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerStrictFPExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -804,7 +804,8 @@ setOperationAction(FloatingPointVPOps, VT, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); + setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, + Custom); setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, VT, Legal); @@ -1021,7 +1022,8 @@ setOperationAction(FloatingPointVPOps, VT, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); + setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, + Custom); setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, @@ -4150,8 +4152,9 @@ if (!Op.getValueType().isVector()) return Op; return lowerVectorFPExtendOrRoundLike(Op, DAG); + case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: - return lowerStrictFPExtend(Op, DAG); + return lowerStrictFPExtendOrRoundLike(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: @@ -5388,14 +5391,14 @@ return Result; } -SDValue RISCVTargetLowering::lowerStrictFPExtend(SDValue Op, - SelectionDAG &DAG) const { +SDValue +RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, + SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = Op.getOperand(0); SDValue Src = Op.getOperand(1); MVT VT = Op.getSimpleValueType(); MVT SrcVT = Src.getSimpleValueType(); - MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); @@ -5406,19 +5409,26 @@ auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); - // RVV can only widen fp to types double the size as the source, so it needs - // two vfwcvt to achieve extending fp16 to fp64. - if (VT.getVectorElementType() == MVT::f64 && - SrcVT.getVectorElementType() == MVT::f16) { + // RVV can only widen/truncate fp to types double/half the size as the source. + if ((VT.getVectorElementType() == MVT::f64 && + SrcVT.getVectorElementType() == MVT::f16) || + (VT.getVectorElementType() == MVT::f16 && + SrcVT.getVectorElementType() == MVT::f64)) { + // For double rounding, the intermediate rounding should be round-to-odd. + unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND + ? RISCVISD::STRICT_FP_EXTEND_VL + : RISCVISD::STRICT_VFNCVT_ROD_VL; MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); - Src = DAG.getNode(RISCVISD::STRICT_FP_EXTEND_VL, DL, - DAG.getVTList(InterVT, MVT::Other), Chain, Src, Mask, VL); + Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other), + Chain, Src, Mask, VL); Chain = Src.getValue(1); } - SDValue Res = - DAG.getNode(RISCVISD::STRICT_FP_EXTEND_VL, DL, - DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, Mask, VL); + unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND + ? RISCVISD::STRICT_FP_EXTEND_VL + : RISCVISD::STRICT_FP_ROUND_VL; + SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), + Chain, Src, Mask, VL); if (VT.isFixedLengthVector()) { // StrictFP operations have two result values. Their lowered result should // have same result count. @@ -14129,7 +14139,9 @@ NODE_NAME_CASE(STRICT_VFNMADD_VL) NODE_NAME_CASE(STRICT_VFMSUB_VL) NODE_NAME_CASE(STRICT_VFNMSUB_VL) + NODE_NAME_CASE(STRICT_FP_ROUND_VL) NODE_NAME_CASE(STRICT_FP_EXTEND_VL) + NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) NODE_NAME_CASE(VWMUL_VL) NODE_NAME_CASE(VWMULU_VL) NODE_NAME_CASE(VWMULSU_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -169,13 +169,21 @@ ]>; def riscv_fpround_vl : SDNode<"RISCVISD::FP_ROUND_VL", SDT_RISCVFPRoundOp_VL>; +def riscv_strict_fpround_vl : SDNode<"RISCVISD::STRICT_FP_ROUND_VL", SDT_RISCVFPRoundOp_VL, [SDNPHasChain]>; def riscv_fpextend_vl : SDNode<"RISCVISD::FP_EXTEND_VL", SDT_RISCVFPExtendOp_VL>; def riscv_strict_fpextend_vl : SDNode<"RISCVISD::STRICT_FP_EXTEND_VL", SDT_RISCVFPExtendOp_VL, [SDNPHasChain]>; def riscv_fncvt_rod_vl : SDNode<"RISCVISD::VFNCVT_ROD_VL", SDT_RISCVFPRoundOp_VL>; +def riscv_strict_fncvt_rod_vl : SDNode<"RISCVISD::STRICT_VFNCVT_ROD_VL", SDT_RISCVFPRoundOp_VL, [SDNPHasChain]>; +def any_riscv_fpround_vl : PatFrags<(ops node:$src, node:$mask, node:$vl), + [(riscv_fpround_vl node:$src, node:$mask, node:$vl), + (riscv_strict_fpround_vl node:$src, node:$mask, node:$vl)]>; def any_riscv_fpextend_vl : PatFrags<(ops node:$src, node:$mask, node:$vl), [(riscv_fpextend_vl node:$src, node:$mask, node:$vl), (riscv_strict_fpextend_vl node:$src, node:$mask, node:$vl)]>; +def any_riscv_fncvt_rod_vl : PatFrags<(ops node:$src, node:$mask, node:$vl), + [(riscv_fncvt_rod_vl node:$src, node:$mask, node:$vl), + (riscv_strict_fncvt_rod_vl node:$src, node:$mask, node:$vl)]>; def SDT_RISCVFP2IOp_VL : SDTypeProfile<1, 3, [ SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, @@ -2048,16 +2056,16 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - def : Pat<(fvti.Vector (riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), - VLOpFrag)), + def : Pat<(fvti.Vector (any_riscv_fpround_vl + (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask V0), VLOpFrag)), (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>; - def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), - VLOpFrag)), + def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl + (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask V0), VLOpFrag)), (!cast("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) +define <2 x float> @vfptrunc_v2f64_v2f32(<2 x double> %va) { +; CHECK-LABEL: vfptrunc_v2f64_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x float> %evec +} + +declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) +define <2 x half> @vfptrunc_v2f64_v2f16(<2 x double> %va) { +; CHECK-LABEL: vfptrunc_v2f64_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v9 +; CHECK-NEXT: ret + %evec = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x half> %evec +} + +declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) +define <2 x half> @vfptrunc_v2f32_v2f16(<2 x float> %va) { +; CHECK-LABEL: vfptrunc_v2f32_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x half> %evec +} + +declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) +define <4 x float> @vfptrunc_v4f64_v4f32(<4 x double> %va) { +; CHECK-LABEL: vfptrunc_v4f64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %evec = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x float> %evec +} + +declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(<4 x double>, metadata, metadata) +define <4 x half> @vfptrunc_v4f64_v4f16(<4 x double> %va) { +; CHECK-LABEL: vfptrunc_v4f64_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v10 +; CHECK-NEXT: ret + %evec = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(<4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x half> %evec +} + +declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) +define <4 x half> @vfptrunc_v4f32_v4f16(<4 x float> %va) { +; CHECK-LABEL: vfptrunc_v4f32_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x half> %evec +} + +declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata) +define <8 x float> @vfptrunc_v8f64_v8f32(<8 x double> %va) { +; CHECK-LABEL: vfptrunc_v8f64_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %evec = call <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x float> %evec +} + +declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(<8 x double>, metadata, metadata) +define <8 x half> @vfptrunc_v8f64_v8f16(<8 x double> %va) { +; CHECK-LABEL: vfptrunc_v8f64_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v12 +; CHECK-NEXT: ret + %evec = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(<8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x half> %evec +} + +declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(<8 x float>, metadata, metadata) +define <8 x half> @vfptrunc_v8f32_v8f16(<8 x float> %va) { +; CHECK-LABEL: vfptrunc_v8f32_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %evec = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(<8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x half> %evec +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.experimental.constrained.fptrunc.nxv1f32.nxv1f64(, metadata, metadata) +define @vfptrunc_nxv1f64_nxv1f32( %va) { +; CHECK-LABEL: vfptrunc_nxv1f64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv1f32.nxv1f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv1f16.nxv1f64(, metadata, metadata) +define @vfptrunc_nxv1f64_nxv1f16( %va) { +; CHECK-LABEL: vfptrunc_nxv1f64_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv1f16.nxv1f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv1f16.nxv1f32(, metadata, metadata) +define @vfptrunc_nxv1f32_nxv1f16( %va) { +; CHECK-LABEL: vfptrunc_nxv1f32_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv1f16.nxv1f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv2f32.nxv2f64(, metadata, metadata) +define @vfptrunc_nxv2f64_nxv2f32( %va) { +; CHECK-LABEL: vfptrunc_nxv2f64_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv2f32.nxv2f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv2f16.nxv2f64(, metadata, metadata) +define @vfptrunc_nxv2f64_nxv2f16( %va) { +; CHECK-LABEL: vfptrunc_nxv2f64_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v10 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv2f16.nxv2f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv2f16.nxv2f32(, metadata, metadata) +define @vfptrunc_nxv2f32_nxv2f16( %va) { +; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv2f16.nxv2f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv4f32.nxv4f64(, metadata, metadata) +define @vfptrunc_nxv4f64_nxv4f32( %va) { +; CHECK-LABEL: vfptrunc_nxv4f64_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv4f32.nxv4f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv4f16.nxv4f64(, metadata, metadata) +define @vfptrunc_nxv4f64_nxv4f16( %va) { +; CHECK-LABEL: vfptrunc_nxv4f64_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v12 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv4f16.nxv4f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv4f16.nxv4f32(, metadata, metadata) +define @vfptrunc_nxv4f32_nxv4f16( %va) { +; CHECK-LABEL: vfptrunc_nxv4f32_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv4f16.nxv4f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv8f32.nxv8f64(, metadata, metadata) +define @vfptrunc_nxv8f64_nxv8f32( %va) { +; CHECK-LABEL: vfptrunc_nxv8f64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv8f32.nxv8f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv8f16.nxv8f64(, metadata, metadata) +define @vfptrunc_nxv8f64_nxv8f16( %va) { +; CHECK-LABEL: vfptrunc_nxv8f64_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v16 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv8f16.nxv8f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv8f16.nxv8f32(, metadata, metadata) +define @vfptrunc_nxv8f32_nxv8f16( %va) { +; CHECK-LABEL: vfptrunc_nxv8f32_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv8f16.nxv8f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +}