diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1877,8 +1877,9 @@ // If abs(Src) was larger than MaxVal or nan, keep it. MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat, - DAG.getCondCode(ISD::SETOLT), TrueMask, VL); + SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, + {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), + DAG.getUNDEF(SetccVT), TrueMask, VL}); // Truncate to integer and convert back to FP. MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); @@ -1896,9 +1897,9 @@ DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType()); SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), SplatVal, VL); - SDValue NeedAdjust = - DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Truncated, Src, - DAG.getCondCode(ISD::SETOLT), Mask, VL); + SDValue NeedAdjust = DAG.getNode( + RISCVISD::SETCC_VL, DL, SetccVT, + {Truncated, Src, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); Truncated = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Truncated, Splat, Truncated, NeedAdjust, VL); } else if (Op.getOpcode() == ISD::FFLOOR) { @@ -1910,9 +1911,9 @@ DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType()); SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), SplatVal, VL); - SDValue NeedAdjust = - DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Src, Truncated, - DAG.getCondCode(ISD::SETOLT), Mask, VL); + SDValue NeedAdjust = DAG.getNode( + RISCVISD::SETCC_VL, DL, SetccVT, + {Src, Truncated, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); Truncated = DAG.getNode(RISCVISD::FSUB_VL, DL, ContainerVT, Truncated, Splat, Truncated, NeedAdjust, VL); } @@ -1973,8 +1974,9 @@ // If abs(Src) was larger than MaxVal or nan, keep it. MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat, - DAG.getCondCode(ISD::SETOLT), TrueMask, VL); + SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, + {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), + DAG.getUNDEF(SetccVT), TrueMask, VL}); bool Ignored; APFloat Point5Pred = APFloat(0.5f); @@ -3757,7 +3759,7 @@ case ISD::VP_SETCC: if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) return lowerVPSetCCMaskOp(Op, DAG); - return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL); + return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true); case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: return lowerVPStridedLoad(Op, DAG); case ISD::EXPERIMENTAL_VP_STRIDED_STORE: @@ -4412,8 +4414,9 @@ MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, DAG.getUNDEF(ContainerVT), Mask, VL); - Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, - DAG.getCondCode(ISD::SETNE), Mask, VL); + Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, + {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(MaskContainerVT), Mask, VL}); if (MaskVT.isFixedLengthVector()) Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); return Trunc; @@ -5036,8 +5039,9 @@ SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); SDValue SelectCond = - DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, - DAG.getCondCode(ISD::SETEQ), Mask, VL); + DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, + {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), + DAG.getUNDEF(MaskVT), Mask, VL}); return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, Vec, VL); } @@ -6127,8 +6131,9 @@ MVT MaskVT = getMaskTypeFor(ContainerVT); SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); - SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, - Op.getOperand(2), Mask, VL); + SDValue Cmp = + DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, + {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); return convertFromScalableVector(VT, Cmp, DAG, Subtarget); } @@ -6529,8 +6534,9 @@ SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, DAG.getUNDEF(InterimIVT), SplatZero); - Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero, - DAG.getCondCode(ISD::SETNE), Mask, VL); + Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, + {Result, SplatZero, DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(DstVT), Mask, VL}); } else { MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), DstVT.getVectorElementCount()); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -147,13 +147,14 @@ def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", - SDTypeProfile<1, 5, [SDTCVecEltisVT<0, i1>, + SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>, SDTCisSameAs<0, 4>, - SDTCisVT<5, XLenVT>]>>; + SDTCisSameAs<0, 5>, + SDTCisVT<6, XLenVT>]>>; def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL", SDTypeProfile<1, 5, [SDTCisVec<0>, @@ -490,10 +491,11 @@ CondCode cc> { def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), vti.RegClass:$rs2, cc, + VR:$merge, (vti.Mask V0), VLOpFrag)), (!cast(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") - (vti.Mask (IMPLICIT_DEF)), + VR:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; @@ -505,10 +507,11 @@ : VPatIntegerSetCCVL_VV { def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs2), vti.RegClass:$rs1, invcc, + VR:$merge, (vti.Mask V0), VLOpFrag)), (!cast(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") - (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + VR:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } @@ -517,15 +520,17 @@ defvar instruction_masked = !cast(instruction_name#"_VX_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (SplatPat (XLenVT GPR:$rs2)), cc, + VR:$merge, (vti.Mask V0), VLOpFrag)), - (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (instruction_masked VR:$merge, vti.RegClass:$rs1, GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)), (vti.Vector vti.RegClass:$rs1), invcc, + VR:$merge, (vti.Mask V0), VLOpFrag)), - (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (instruction_masked VR:$merge, vti.RegClass:$rs1, GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } @@ -534,18 +539,20 @@ defvar instruction_masked = !cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (SplatPat_simm5 simm5:$rs2), cc, + VR:$merge, (vti.Mask V0), VLOpFrag)), - (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (instruction_masked VR:$merge, vti.RegClass:$rs1, XLenVT:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; // FIXME: Can do some canonicalization to remove these patterns. def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2), (vti.Vector vti.RegClass:$rs1), invcc, + VR:$merge, (vti.Mask V0), VLOpFrag)), - (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (instruction_masked VR:$merge, vti.RegClass:$rs1, simm5:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } @@ -557,18 +564,20 @@ defvar instruction_masked = !cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (splatpat_kind simm5:$rs2), cc, + VR:$merge, (vti.Mask V0), VLOpFrag)), - (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (instruction_masked VR:$merge, vti.RegClass:$rs1, (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; // FIXME: Can do some canonicalization to remove these patterns. def : Pat<(vti.Mask (riscv_setcc_vl (splatpat_kind simm5:$rs2), (vti.Vector vti.RegClass:$rs1), invcc, + VR:$merge, (vti.Mask V0), VLOpFrag)), - (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (instruction_masked VR:$merge, vti.RegClass:$rs1, (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } @@ -580,28 +589,31 @@ def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1), fvti.RegClass:$rs2, cc, + VR:$merge, (fvti.Mask V0), VLOpFrag)), (!cast(inst_name#"_VV_"#fvti.LMul.MX#"_MASK") - (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1, + VR:$merge, fvti.RegClass:$rs1, fvti.RegClass:$rs2, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1), (SplatFPOp fvti.ScalarRegClass:$rs2), cc, + VR:$merge, (fvti.Mask V0), VLOpFrag)), (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") - (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1, + VR:$merge, fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (riscv_setcc_vl (SplatFPOp fvti.ScalarRegClass:$rs2), (fvti.Vector fvti.RegClass:$rs1), cc, + VR:$merge, (fvti.Mask V0), VLOpFrag)), (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") - (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1, + VR:$merge, fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -14,15 +14,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI0_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) ret %a @@ -39,15 +39,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI1_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f16( %x) ret %a @@ -64,15 +64,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI2_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) ret %a @@ -89,12 +89,11 @@ ; CHECK-NEXT: vmflt.vf v10, v12, ft0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI3_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v11, v10 ; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -115,12 +114,11 @@ ; CHECK-NEXT: vmflt.vf v12, v16, ft0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI4_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v13, v12 ; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -141,12 +139,11 @@ ; CHECK-NEXT: vmflt.vf v16, v24, ft0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI5_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -167,15 +164,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI6_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) ret %a @@ -192,15 +189,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI7_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f32( %x) ret %a @@ -217,12 +214,11 @@ ; CHECK-NEXT: vmflt.vf v10, v12, ft0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI8_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v11, v10 ; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -243,12 +239,11 @@ ; CHECK-NEXT: vmflt.vf v12, v16, ft0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v13, v12 ; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -269,12 +264,11 @@ ; CHECK-NEXT: vmflt.vf v16, v24, ft0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -295,15 +289,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI11_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) ret %a @@ -320,12 +314,11 @@ ; CHECK-NEXT: vmflt.vf v10, v12, ft0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI12_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v11, v10 ; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -346,12 +339,11 @@ ; CHECK-NEXT: vmflt.vf v12, v16, ft0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI13_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v13, v12 ; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -372,12 +364,11 @@ ; CHECK-NEXT: vmflt.vf v16, v24, ft0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI14_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -14,15 +14,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI0_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f16( %x) ret %a @@ -39,15 +39,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI1_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f16( %x) ret %a @@ -64,15 +64,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI2_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f16( %x) ret %a @@ -89,12 +89,11 @@ ; CHECK-NEXT: vmflt.vf v10, v12, ft0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI3_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v11, v10 ; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -115,12 +114,11 @@ ; CHECK-NEXT: vmflt.vf v12, v16, ft0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI4_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v13, v12 ; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -141,12 +139,11 @@ ; CHECK-NEXT: vmflt.vf v16, v24, ft0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI5_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -167,15 +164,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI6_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f32( %x) ret %a @@ -192,15 +189,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI7_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f32( %x) ret %a @@ -217,12 +214,11 @@ ; CHECK-NEXT: vmflt.vf v10, v12, ft0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI8_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v11, v10 ; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -243,12 +239,11 @@ ; CHECK-NEXT: vmflt.vf v12, v16, ft0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v13, v12 ; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -269,12 +264,11 @@ ; CHECK-NEXT: vmflt.vf v16, v24, ft0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -295,15 +289,15 @@ ; CHECK-NEXT: vmflt.vf v9, v9, ft0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI11_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v10, v9 +; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f64( %x) ret %a @@ -320,12 +314,11 @@ ; CHECK-NEXT: vmflt.vf v10, v12, ft0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI12_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v11, v10 ; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -346,12 +339,11 @@ ; CHECK-NEXT: vmflt.vf v12, v16, ft0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI13_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v13, v12 ; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -372,12 +364,11 @@ ; CHECK-NEXT: vmflt.vf v16, v24, ft0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI14_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -2030,23 +2030,23 @@ ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI94_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1) -; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v8, v8, ft0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t ; CHECK-NEXT: lui a1, %hi(.LCPI94_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI94_1)(a1) -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t -; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <8 x half>, <8 x half>* %x %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a) @@ -2059,23 +2059,23 @@ ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI95_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1) -; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v8, v8, ft0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t ; CHECK-NEXT: lui a1, %hi(.LCPI95_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI95_1)(a1) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vse32.v v10, (a0) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) @@ -2088,23 +2088,23 @@ ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v9, (a0) +; CHECK-NEXT: vle64.v v10, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI96_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1) -; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v8, v8, ft0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t ; CHECK-NEXT: lui a1, %hi(.LCPI96_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI96_1)(a1) -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t -; CHECK-NEXT: vse64.v v9, (a0) +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) @@ -2117,23 +2117,23 @@ ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI97_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1) -; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v8, v8, ft0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t ; CHECK-NEXT: lui a1, %hi(.LCPI97_1) ; CHECK-NEXT: flh ft0, %lo(.LCPI97_1)(a1) -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t -; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <8 x half>, <8 x half>* %x %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a) @@ -2146,23 +2146,23 @@ ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI98_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1) -; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v8, v8, ft0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t ; CHECK-NEXT: lui a1, %hi(.LCPI98_1) ; CHECK-NEXT: flw ft0, %lo(.LCPI98_1)(a1) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vse32.v v10, (a0) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) @@ -2175,23 +2175,23 @@ ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v9, (a0) +; CHECK-NEXT: vle64.v v10, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI99_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1) -; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v8, v8, ft0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t ; CHECK-NEXT: lui a1, %hi(.LCPI99_1) ; CHECK-NEXT: fld ft0, %lo(.LCPI99_1)(a1) -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t -; CHECK-NEXT: vse64.v v9, (a0) +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)