diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -243,6 +243,7 @@ VFCVT_RM_X_F_VL, // Has a rounding mode operand. SINT_TO_FP_VL, UINT_TO_FP_VL, + VFCVT_RM_F_XU_VL, // Has a rounding mode operand. FP_ROUND_VL, FP_EXTEND_VL, @@ -701,6 +702,7 @@ SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -675,9 +675,7 @@ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point // type that can represent the value exactly. if (VT.getVectorElementType() != MVT::i64) { - MVT FloatEltVT = - VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; - EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); + EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); if (isTypeLegal(FloatVT)) { setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, Custom); @@ -910,10 +908,7 @@ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point // type that can represent the value exactly. if (VT.getVectorElementType() != MVT::i64) { - MVT FloatEltVT = - VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; - EVT FloatVT = - MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); + EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); if (isTypeLegal(FloatVT)) setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, Custom); @@ -3527,7 +3522,9 @@ // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting // the exponent. -static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { +SDValue +RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, + SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); unsigned EltSize = VT.getScalarSizeInBits(); SDValue Src = Op.getOperand(0); @@ -3538,6 +3535,15 @@ MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32; MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); + // Fraction of f32 is not able to present each i32 value. + // Use RTZ to avoid rounding influencing exponent of FLoatVal. + bool UseRTZ = false; + if (!isTypeLegal(FloatVT)) { + FloatEltVT = MVT::f32; + FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + UseRTZ = true; + } + // Legal types should have been checked in the RISCVTargetLowering // constructor. // TODO: Splitting may make sense in some cases. @@ -3552,7 +3558,26 @@ } // We have a legal FP type, convert to it. - SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); + SDValue FloatVal; + if (!UseRTZ) { + FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); + } else { + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + SDValue RTZRM = + DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); + MVT ContainerFloatVT = + MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); + FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, + Src, Mask, RTZRM, VL); + if (VT.isFixedLengthVector()) + FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); + } // Bitcast to integer and shift the exponent to the LSB. EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); @@ -11483,6 +11508,18 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M2_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M4_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M8_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF2_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF4_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, RISCV::PseudoVFCVT_F_X_V_M1_MASK); @@ -13079,6 +13116,7 @@ NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) NODE_NAME_CASE(SINT_TO_FP_VL) NODE_NAME_CASE(UINT_TO_FP_VL) + NODE_NAME_CASE(VFCVT_RM_F_XU_VL) NODE_NAME_CASE(FP_EXTEND_VL) NODE_NAME_CASE(FP_ROUND_VL) NODE_NAME_CASE(VWMUL_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3406,6 +3406,17 @@ } } +multiclass VPseudoVCVTF_RM_V { + foreach m = MxListF in { + defvar mx = m.MX; + defvar WriteVFCvtIToFV_MX = !cast("WriteVFCvtIToFV_" # mx); + defvar ReadVFCvtIToFV_MX = !cast("ReadVFCvtIToFV_" # mx); + + defm _V : VPseudoConversionRM, + Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>; + } +} + multiclass VPseudoConversionW_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in @@ -5487,6 +5498,7 @@ defm PseudoVFCVT_F_XU : VPseudoVCVTF_V; defm PseudoVFCVT_F_X : VPseudoVCVTF_V; } +defm PseudoVFCVT_RM_F_XU : VPseudoVCVTF_RM_V; } // mayRaiseFPException = true //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -140,11 +140,17 @@ SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT> ]>; +def SDT_RISCVI2FPOp_RM_VL : SDTypeProfile<1, 4, [ + SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>, + SDTCisVT<4, XLenVT> +]>; def riscv_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL>; def riscv_vfcvt_rtz_xu_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_XU_F_VL", SDT_RISCVFP2IOp_VL>; def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; +def riscv_vfcvt_rm_f_xu_vl : SDNode<"RISCVISD::VFCVT_RM_F_XU_VL", SDT_RISCVI2FPOp_RM_VL>; def SDT_RISCVVecCvtX2FOp_VL : SDTypeProfile<1, 4, [ SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, @@ -776,6 +782,18 @@ } } +multiclass VPatConvertI2FP_RM_VL_V { + foreach fvti = AllFloatVectors in { + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), + (ivti.Mask V0), (XLenVT timm:$frm), + VLOpFrag)), + (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, + (ivti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + } +} + multiclass VPatWConvertFP2IVL_V { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; @@ -1720,6 +1738,7 @@ defm : VPatConvertFP2IVL_V; defm : VPatConvertI2FPVL_V; defm : VPatConvertI2FPVL_V; + defm : VPatConvertI2FP_RM_VL_V; // 14.18. Widening Floating-Point/Integer Type-Convert Instructions defm : VPatWConvertFP2IVL_V; diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 @@ -29,6 +31,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -73,6 +90,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -117,6 +149,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -161,6 +208,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -205,6 +267,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v10, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma @@ -349,6 +426,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -432,6 +521,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -515,6 +616,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -598,6 +711,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v10, v10, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -681,6 +806,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v12, v12, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -841,6 +978,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -934,6 +1086,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -1027,6 +1194,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v10, v10, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -1120,6 +1302,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v12, v12, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -1141,77 +1338,107 @@ declare @llvm.ctlz.nxv8i32(, i1) define @ctlz_nxv16i32( %va) { -; RV32-LABEL: ctlz_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v16, v16, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v16, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v16, v16, 23 +; CHECK-D-NEXT: li a1, 158 +; CHECK-D-NEXT: vrsub.vx v16, v16, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i32( %va, i1 false) ret %a } @@ -1646,6 +1873,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -1687,6 +1927,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -1728,6 +1981,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -1769,6 +2035,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -1810,6 +2089,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v16, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma @@ -1949,6 +2241,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -2028,6 +2329,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -2107,6 +2417,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -2186,6 +2505,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -2265,6 +2593,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -2420,6 +2757,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -2509,6 +2858,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -2598,6 +2959,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -2687,6 +3060,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -2704,77 +3089,101 @@ } define @ctlz_zero_undef_nxv16i32( %va) { -; RV32-LABEL: ctlz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_zero_undef_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_zero_undef_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 +; CHECK-D-NEXT: li a1, 158 +; CHECK-D-NEXT: vrsub.vx v8, v8, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i32( %va, i1 true) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 @@ -26,6 +28,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, ma @@ -70,6 +90,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, ma @@ -114,6 +152,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, ma @@ -158,6 +214,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, ma @@ -202,6 +276,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v10, v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v10 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v10, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -333,6 +425,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -408,6 +514,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -483,6 +603,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -558,6 +692,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v10, v8, v10 +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v10, v10, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -633,6 +781,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v12, v8, v12 +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v12, v12, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -771,6 +933,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -852,6 +1031,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -933,6 +1129,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v10, v8, v10 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v10, v10, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -1014,6 +1227,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v12, v8, v12 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v12, v0.t +; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v12, v12, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -1037,63 +1267,97 @@ declare @llvm.cttz.nxv8i32(, i1) define @cttz_nxv16i32( %va) { -; RV32-LABEL: cttz_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vsub.vx v16, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64I-NEXT: vsub.vx v16, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vrsub.vi v16, v8, 0 +; CHECK-F-NEXT: vand.vv v16, v8, v16 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v16, v16, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vrsub.vi v16, v8, 0 +; CHECK-D-NEXT: vand.vv v16, v8, v16 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-D-NEXT: vsrl.vi v16, v16, 23 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v16, v16, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i32( %va, i1 false) ret %a } @@ -1445,6 +1709,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, ma @@ -1486,6 +1766,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, ma @@ -1527,6 +1823,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, ma @@ -1568,6 +1880,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, ma @@ -1609,6 +1937,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v16, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1735,6 +2079,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -1806,6 +2161,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -1877,6 +2243,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -1948,6 +2325,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v10 +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -2019,6 +2407,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v12 +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -2152,6 +2551,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -2229,6 +2642,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -2306,6 +2733,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v10 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -2383,6 +2824,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v12 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -2402,63 +2857,91 @@ } define @cttz_zero_undef_nxv16i32( %va) { -; RV32-LABEL: cttz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vsub.vx v16, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_zero_undef_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64I-NEXT: vsub.vx v16, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_zero_undef_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vrsub.vi v16, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v16 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vrsub.vi v16, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v16 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i32( %va, i1 true) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 @@ -202,6 +204,34 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret ; +; LMULMAX2-RV32F-LABEL: ctlz_v8i16: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v8 +; LMULMAX2-RV32F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 142 +; LMULMAX2-RV32F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 16 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret +; +; LMULMAX2-RV64F-LABEL: ctlz_v8i16: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v8 +; LMULMAX2-RV64F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 142 +; LMULMAX2-RV64F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 16 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; ; LMULMAX2-RV32D-LABEL: ctlz_v8i16: ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -328,81 +358,39 @@ ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: ctlz_v4i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: li a1, 158 +; LMULMAX2-RV32F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctlz_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 349525 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 209715 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 61681 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: ctlz_v4i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 158 +; LMULMAX2-RV64F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret ; ; LMULMAX2-RV32D-LABEL: ctlz_v4i32: ; LMULMAX2-RV32D: # %bb.0: @@ -1071,209 +1059,149 @@ declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-RV32I-LABEL: ctlz_v8i32: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 16 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX2-RV64-LABEL: ctlz_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV64I-LABEL: ctlz_v8i32: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 16 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a2, 349525 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a3, 209715 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 61681 -; LMULMAX1-RV32-NEXT: addi a4, a4, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: lui a5, 4112 -; LMULMAX1-RV32-NEXT: addi a5, a5, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 16 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: ctlz_v8i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 158 +; LMULMAX2-RV32F-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctlz_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 349525 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 209715 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 61681 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: lui a5, 4112 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 16 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: ctlz_v8i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 158 +; LMULMAX2-RV64F-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: ctlz_v8i32: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32D-NEXT: li a1, 158 +; LMULMAX2-RV32D-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32D-NEXT: li a1, 32 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: ctlz_v8i32: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64D-NEXT: li a1, 158 +; LMULMAX2-RV64D-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 32 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-LABEL: ctlz_v8i32: ; LMULMAX8: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 @@ -182,6 +184,38 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret ; +; LMULMAX2-RV32F-LABEL: cttz_v8i16: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v9 +; LMULMAX2-RV32F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 16 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret +; +; LMULMAX2-RV64F-LABEL: cttz_v8i16: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v9 +; LMULMAX2-RV64F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 16 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; ; LMULMAX2-RV32D-LABEL: cttz_v8i16: ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -300,67 +334,43 @@ ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: cttz_v4i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: cttz_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 349525 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 209715 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 61681 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: cttz_v4i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret ; ; LMULMAX2-RV32D-LABEL: cttz_v4i32: ; LMULMAX2-RV32D: # %bb.0: @@ -938,165 +948,143 @@ declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) define void @cttz_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-RV32I-LABEL: cttz_v8i32: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX2-RV64-LABEL: cttz_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV64I-LABEL: cttz_v8i32: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: li a1, 1 +; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 349525 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 209715 -; LMULMAX1-RV32-NEXT: addi a4, a4, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a5, 61681 -; LMULMAX1-RV32-NEXT: addi a5, a5, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: lui a6, 4112 -; LMULMAX1-RV32-NEXT: addi a6, a6, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: cttz_v8i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: cttz_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a2, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 349525 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 209715 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 61681 -; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a6, 4112 -; LMULMAX1-RV64-NEXT: addiw a6, a6, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: cttz_v8i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: cttz_v8i32: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32D-NEXT: li a1, 127 +; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32D-NEXT: li a1, 32 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: cttz_v8i32: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV64D-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64D-NEXT: li a1, 127 +; LMULMAX2-RV64D-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 32 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-LABEL: cttz_v8i32: ; LMULMAX8: # %bb.0: