diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -243,6 +243,7 @@ VFCVT_RM_X_F_VL, // Has a rounding mode operand. SINT_TO_FP_VL, UINT_TO_FP_VL, + VFCVT_RM_F_XU_VL, // Has a rounding mode operand. FP_ROUND_VL, FP_EXTEND_VL, @@ -701,6 +702,7 @@ SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -672,16 +672,12 @@ // Splice setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); - // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point - // type that can represent the value exactly. - if (VT.getVectorElementType() != MVT::i64) { - MVT FloatEltVT = - VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; - EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); - if (isTypeLegal(FloatVT)) { - setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); - } + // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the range + // of f32. + EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + if (isTypeLegal(FloatVT)) { + setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); } } @@ -907,17 +903,12 @@ setOperationAction(IntegerVPOps, VT, Custom); - // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point - // type that can represent the value exactly. - if (VT.getVectorElementType() != MVT::i64) { - MVT FloatEltVT = - VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; - EVT FloatVT = - MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); - if (isTypeLegal(FloatVT)) - setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); - } + // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the + // range of f32. + EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + if (isTypeLegal(FloatVT)) + setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -3527,15 +3518,35 @@ // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting // the exponent. -static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { +SDValue +RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, + SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); unsigned EltSize = VT.getScalarSizeInBits(); SDValue Src = Op.getOperand(0); SDLoc DL(Op); - // We need a FP type that can represent the value. - // TODO: Use f16 for i8 when possible? - MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32; + // We choose FP type that can represent the value if possible. Otherwise, we + // use rounding to zero conversion for correct exponent of the result. + MVT FloatEltVT = MVT::f32; + bool UseRTZ = false; + switch (EltSize) { + default: + // TODO: Use f16 for i8 when possible? + break; + case 32: + // TODO: When using f32 is benefit? + if (isTypeLegal(MVT::getVectorVT(MVT::f64, VT.getVectorElementCount()))) { + FloatEltVT = MVT::f64; + break; + } + [[fallthrough]]; + case 64: + // Fraction of f32 is not able to present each i32/i64 value. + // Use RTZ to avoid rounding influencing exponent of FLoatVal. + UseRTZ = true; + break; + } MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); // Legal types should have been checked in the RISCVTargetLowering @@ -3552,27 +3563,49 @@ } // We have a legal FP type, convert to it. - SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); + SDValue FloatVal; + if (!UseRTZ) { + FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); + } else { + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + SDValue RTZRM = + DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); + MVT ContainerFloatVT = + MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); + FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, + Src, Mask, RTZRM, VL); + if (VT.isFixedLengthVector()) + FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); + } // Bitcast to integer and shift the exponent to the LSB. EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; - SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, - DAG.getConstant(ShiftAmt, DL, IntVT)); - // Truncate back to original type to allow vnsrl. - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift); + SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, + DAG.getConstant(ShiftAmt, DL, IntVT)); + // Restore back to original type. Truncation after SRL is to generate snrl. + if (IntVT.bitsLT(VT)) + Exp = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Exp); + else if (IntVT.bitsGT(VT)) + Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); // The exponent contains log2 of the value in biased form. unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; // For trailing zeros, we just need to subtract the bias. if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) - return DAG.getNode(ISD::SUB, DL, VT, Trunc, + return DAG.getNode(ISD::SUB, DL, VT, Exp, DAG.getConstant(ExponentBias, DL, VT)); // For leading zeros, we need to remove the bias and convert from log2 to // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). unsigned Adjust = ExponentBias + (EltSize - 1); - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); } // While RVV has alignment restrictions, we should always be able to load as a @@ -11483,6 +11516,28 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M2_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M4_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M8_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF2_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF4_MASK); + case RISCV::PseudoVFNCVT_RM_F_XU_W_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK); + case RISCV::PseudoVFNCVT_RM_F_XU_W_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M2_MASK); + case RISCV::PseudoVFNCVT_RM_F_XU_W_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M4_MASK); + case RISCV::PseudoVFNCVT_RM_F_XU_W_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK); + case RISCV::PseudoVFNCVT_RM_F_XU_W_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, RISCV::PseudoVFCVT_F_X_V_M1_MASK); @@ -13079,6 +13134,7 @@ NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) NODE_NAME_CASE(SINT_TO_FP_VL) NODE_NAME_CASE(UINT_TO_FP_VL) + NODE_NAME_CASE(VFCVT_RM_F_XU_VL) NODE_NAME_CASE(FP_EXTEND_VL) NODE_NAME_CASE(FP_ROUND_VL) NODE_NAME_CASE(VWMUL_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3406,6 +3406,17 @@ } } +multiclass VPseudoVCVTF_RM_V { + foreach m = MxListF in { + defvar mx = m.MX; + defvar WriteVFCvtIToFV_MX = !cast("WriteVFCvtIToFV_" # mx); + defvar ReadVFCvtIToFV_MX = !cast("ReadVFCvtIToFV_" # mx); + + defm _V : VPseudoConversionRM, + Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>; + } +} + multiclass VPseudoConversionW_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in @@ -3472,6 +3483,18 @@ } } +multiclass VPseudoVNCVTF_RM_W { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListFW in { + defvar mx = m.MX; + defvar WriteVFNCvtIToFV_MX = !cast("WriteVFNCvtIToFV_" # mx); + defvar ReadVFNCvtIToFV_MX = !cast("ReadVFNCvtIToFV_" # mx); + + defm _W : VPseudoConversionRM, + Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>; + } +} + multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -5487,6 +5510,7 @@ defm PseudoVFCVT_F_XU : VPseudoVCVTF_V; defm PseudoVFCVT_F_X : VPseudoVCVTF_V; } +defm PseudoVFCVT_RM_F_XU : VPseudoVCVTF_RM_V; } // mayRaiseFPException = true //===----------------------------------------------------------------------===// @@ -5520,6 +5544,7 @@ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W; } defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; +defm PseudoVFNCVT_RM_F_XU : VPseudoVNCVTF_RM_W; } // mayRaiseFPException = true } // Predicates = [HasVInstructionsAnyF] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -140,11 +140,17 @@ SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT> ]>; +def SDT_RISCVI2FPOp_RM_VL : SDTypeProfile<1, 4, [ + SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>, + SDTCisVT<4, XLenVT> +]>; def riscv_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL>; def riscv_vfcvt_rtz_xu_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_XU_F_VL", SDT_RISCVFP2IOp_VL>; def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; +def riscv_vfcvt_rm_f_xu_vl : SDNode<"RISCVISD::VFCVT_RM_F_XU_VL", SDT_RISCVI2FPOp_RM_VL>; def SDT_RISCVVecCvtX2FOp_VL : SDTypeProfile<1, 4, [ SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, @@ -776,6 +782,18 @@ } } +multiclass VPatConvertI2FP_RM_VL_V { + foreach fvti = AllFloatVectors in { + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), + (ivti.Mask V0), (XLenVT timm:$frm), + VLOpFrag)), + (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, + (ivti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + } +} + multiclass VPatWConvertFP2IVL_V { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; @@ -828,6 +846,19 @@ } } +multiclass VPatNConvertI2FP_RM_VL_V { + foreach fvtiToFWti = AllWidenableFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar iwti = GetIntVTypeInfo.Vti; + def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), + (iwti.Mask V0), (XLenVT timm:$frm), + VLOpFrag)), + (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, + (iwti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + } +} + multiclass VPatReductionVL { foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in { defvar vti_m1 = !cast(!if(is_float, "VF", "VI") # vti.SEW # "M1"); @@ -1720,6 +1751,7 @@ defm : VPatConvertFP2IVL_V; defm : VPatConvertI2FPVL_V; defm : VPatConvertI2FPVL_V; + defm : VPatConvertI2FP_RM_VL_V; // 14.18. Widening Floating-Point/Integer Type-Convert Instructions defm : VPatWConvertFP2IVL_V; @@ -1742,6 +1774,8 @@ defm : VPatNConvertFP2IVL_V; defm : VPatNConvertI2FPVL_V; defm : VPatNConvertI2FPVL_V; + defm : + VPatNConvertI2FP_RM_VL_V; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 @@ -29,6 +31,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -73,6 +90,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -117,6 +149,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -161,6 +208,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -205,6 +267,21 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v10, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma @@ -349,6 +426,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -432,6 +521,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -515,6 +616,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -598,6 +711,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v10, v10, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -681,6 +806,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v12, v12, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -841,6 +978,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -934,6 +1086,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -1027,6 +1194,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v10, v10, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -1120,6 +1302,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v12, v12, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -1141,481 +1338,647 @@ declare @llvm.ctlz.nxv8i32(, i1) define @ctlz_nxv16i32( %va) { -; RV32-LABEL: ctlz_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v16, v16, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v16, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v16, v16, 23 +; CHECK-D-NEXT: li a1, 158 +; CHECK-D-NEXT: vrsub.vx v16, v16, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i32( %va, i1 false) ret %a } declare @llvm.ctlz.nxv16i32(, i1) define @ctlz_nxv1i64( %va) { -; RV32-LABEL: ctlz_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 1 -; RV32-NEXT: vand.vv v9, v11, v9 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v11 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_nxv1i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v9, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 1 +; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: vand.vv v9, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v9, v8 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v11 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI18_1) -; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI18_2) -; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI18_3) -; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_nxv1i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v9, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI18_0) +; RV64I-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI18_1) +; RV64I-NEXT: ld a1, %lo(.LCPI18_1)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vand.vx v9, v9, a0 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI18_2) +; RV64I-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI18_3) +; RV64I-NEXT: ld a1, %lo(.LCPI18_3)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_nxv1i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-F-NEXT: vzext.vf2 v10, v9 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v9, v10, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv1i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v9, v9, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-D-NEXT: vzext.vf2 v10, v9 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v9, v10, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i64( %va, i1 false) ret %a } declare @llvm.ctlz.nxv1i64(, i1) define @ctlz_nxv2i64( %va) { -; RV32-LABEL: ctlz_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v10, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 1 -; RV32-NEXT: vand.vv v10, v14, v10 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v14 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_nxv2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v10, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 1 +; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: vand.vv v10, v8, v12 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v14 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI19_1) -; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI19_2) -; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI19_3) -; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_nxv2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v10, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI19_0) +; RV64I-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI19_1) +; RV64I-NEXT: ld a1, %lo(.LCPI19_1)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vand.vx v10, v10, a0 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI19_2) +; RV64I-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI19_3) +; RV64I-NEXT: ld a1, %lo(.LCPI19_3)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_nxv2i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-F-NEXT: vzext.vf2 v12, v10 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v10, v12, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv2i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v10, v10, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-D-NEXT: vzext.vf2 v12, v10 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v10, v12, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i64( %va, i1 false) ret %a } declare @llvm.ctlz.nxv2i64(, i1) define @ctlz_nxv4i64( %va) { -; RV32-LABEL: ctlz_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v12, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 1 -; RV32-NEXT: vand.vv v12, v20, v12 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v20 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: ctlz_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI20_1) -; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI20_2) -; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI20_3) -; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %a = call @llvm.ctlz.nxv4i64( %va, i1 false) - ret %a -} -declare @llvm.ctlz.nxv4i64(, i1) - -define @ctlz_nxv8i64( %va) { -; RV32-LABEL: ctlz_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v16, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_nxv4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vsrl.vi v12, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v12, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 1 +; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsub.vv v8, v8, v12 +; RV32I-NEXT: vand.vv v12, v8, v16 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v20 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI21_1) -; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI21_2) -; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI21_3) -; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_nxv4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v12, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI20_0) +; RV64I-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI20_1) +; RV64I-NEXT: ld a1, %lo(.LCPI20_1)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: vand.vx v12, v12, a0 +; RV64I-NEXT: vsub.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v12, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI20_2) +; RV64I-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI20_3) +; RV64I-NEXT: ld a1, %lo(.LCPI20_3)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_nxv4i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-F-NEXT: vzext.vf2 v16, v12 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v12, v16, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv4i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v12, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v12, v12, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-D-NEXT: vzext.vf2 v16, v12 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v12, v16, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret + %a = call @llvm.ctlz.nxv4i64( %va, i1 false) + ret %a +} +declare @llvm.ctlz.nxv4i64(, i1) + +define @ctlz_nxv8i64( %va) { +; RV32I-LABEL: ctlz_nxv8i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v16, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 1 +; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: vand.vv v16, v8, v24 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v0 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ctlz_nxv8i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v16, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI21_0) +; RV64I-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI21_1) +; RV64I-NEXT: ld a1, %lo(.LCPI21_1)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI21_2) +; RV64I-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI21_3) +; RV64I-NEXT: ld a1, %lo(.LCPI21_3)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_nxv8i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-F-NEXT: vzext.vf2 v24, v16 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v16, v24, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv8i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v16, v16, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-D-NEXT: vzext.vf2 v24, v16 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v16, v24, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i64( %va, i1 false) ret %a } @@ -1646,6 +2009,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -1687,6 +2063,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -1728,6 +2117,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -1769,6 +2171,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -1810,6 +2225,19 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v16, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-F-NEXT: li a0, 134 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma @@ -1949,6 +2377,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -2028,6 +2465,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -2107,6 +2553,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -2186,6 +2641,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -2265,6 +2729,15 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-F-NEXT: li a0, 142 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -2420,6 +2893,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -2509,6 +2994,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -2598,6 +3095,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -2687,6 +3196,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: ctlz_zero_undef_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -2704,477 +3225,613 @@ } define @ctlz_zero_undef_nxv16i32( %va) { -; RV32-LABEL: ctlz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_zero_undef_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_zero_undef_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 +; CHECK-D-NEXT: li a1, 158 +; CHECK-D-NEXT: vrsub.vx v8, v8, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i32( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv1i64( %va) { -; RV32-LABEL: ctlz_zero_undef_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 1 -; RV32-NEXT: vand.vv v9, v11, v9 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v11 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_nxv1i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v9, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 1 +; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: vand.vv v9, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v9, v8 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v11 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_zero_undef_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI40_0) -; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_1) -; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI40_2) -; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_3) -; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_zero_undef_nxv1i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v9, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI40_0) +; RV64I-NEXT: ld a0, %lo(.LCPI40_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI40_1) +; RV64I-NEXT: ld a1, %lo(.LCPI40_1)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vand.vx v9, v9, a0 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI40_2) +; RV64I-NEXT: ld a0, %lo(.LCPI40_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI40_3) +; RV64I-NEXT: ld a1, %lo(.LCPI40_3)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-F-NEXT: vzext.vf2 v9, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v9, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v9, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-D-NEXT: vzext.vf2 v9, v8 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v8, v9, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i64( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv2i64( %va) { -; RV32-LABEL: ctlz_zero_undef_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v10, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 1 -; RV32-NEXT: vand.vv v10, v14, v10 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v14 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_nxv2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v10, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 1 +; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: vand.vv v10, v8, v12 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v14 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_zero_undef_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI41_0) -; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_1) -; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI41_2) -; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_3) -; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_zero_undef_nxv2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v10, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI41_0) +; RV64I-NEXT: ld a0, %lo(.LCPI41_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI41_1) +; RV64I-NEXT: ld a1, %lo(.LCPI41_1)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vand.vx v10, v10, a0 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI41_2) +; RV64I-NEXT: ld a0, %lo(.LCPI41_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI41_3) +; RV64I-NEXT: ld a1, %lo(.LCPI41_3)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-F-NEXT: vzext.vf2 v10, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v10, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v10, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-D-NEXT: vzext.vf2 v10, v8 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v8, v10, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i64( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv4i64( %va) { -; RV32-LABEL: ctlz_zero_undef_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v12, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 1 -; RV32-NEXT: vand.vv v12, v20, v12 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v20 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_nxv4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vsrl.vi v12, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v12, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v12 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 1 +; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsub.vv v8, v8, v12 +; RV32I-NEXT: vand.vv v12, v8, v16 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v20 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_zero_undef_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI42_0) -; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_1) -; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI42_2) -; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_3) -; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_zero_undef_nxv4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vsrl.vi v12, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v12, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v12 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI42_0) +; RV64I-NEXT: ld a0, %lo(.LCPI42_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI42_1) +; RV64I-NEXT: ld a1, %lo(.LCPI42_1)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: vand.vx v12, v12, a0 +; RV64I-NEXT: vsub.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v12, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI42_2) +; RV64I-NEXT: ld a0, %lo(.LCPI42_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI42_3) +; RV64I-NEXT: ld a1, %lo(.LCPI42_3)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-F-NEXT: vzext.vf2 v12, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v12, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v12, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v12, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-D-NEXT: vzext.vf2 v12, v8 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v8, v12, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i64( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv8i64( %va) { -; RV32-LABEL: ctlz_zero_undef_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v16, v8, a0 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_nxv8i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: vsrl.vx v16, v8, a0 +; RV32I-NEXT: vor.vv v8, v8, v16 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 1 +; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: vand.vv v16, v8, v24 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v0 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: ctlz_zero_undef_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI43_0) -; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_1) -; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI43_2) -; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_3) -; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: ctlz_zero_undef_nxv8i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: vsrl.vx v16, v8, a0 +; RV64I-NEXT: vor.vv v8, v8, v16 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI43_0) +; RV64I-NEXT: ld a0, %lo(.LCPI43_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI43_1) +; RV64I-NEXT: ld a1, %lo(.LCPI43_1)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI43_2) +; RV64I-NEXT: ld a0, %lo(.LCPI43_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI43_3) +; RV64I-NEXT: ld a1, %lo(.LCPI43_3)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-F-NEXT: vzext.vf2 v16, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v16, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v16, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-D-NEXT: vzext.vf2 v16, v8 +; CHECK-D-NEXT: li a1, 190 +; CHECK-D-NEXT: vrsub.vx v8, v16, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i64( %va, i1 true) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 @@ -26,6 +28,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, ma @@ -70,6 +90,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, ma @@ -114,6 +152,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, ma @@ -158,6 +214,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v9 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v9, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, ma @@ -202,6 +276,24 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v10, v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v10 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsub.vx v8, v10, a0 +; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -333,6 +425,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -408,6 +514,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -483,6 +603,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -558,6 +692,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v10, v8, v10 +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v10, v10, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -633,6 +781,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v12, v8, v12 +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v12, v12, a0 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a0, 16 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -771,6 +933,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -852,6 +1031,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -933,6 +1129,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v10, v8, v10 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v10, v10, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -1014,6 +1227,23 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v12, v8, v12 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v12, v0.t +; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v12, v12, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -1037,387 +1267,421 @@ declare @llvm.cttz.nxv8i32(, i1) define @cttz_nxv16i32( %va) { -; RV32-LABEL: cttz_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vsub.vx v16, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64I-NEXT: vsub.vx v16, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vrsub.vi v16, v8, 0 +; CHECK-F-NEXT: vand.vv v16, v8, v16 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v16, v16, a1 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: li a1, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vrsub.vi v16, v8, 0 +; CHECK-D-NEXT: vand.vv v16, v8, v16 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-D-NEXT: vsrl.vi v16, v16, 23 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v16, v16, a1 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a1, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i32( %va, i1 false) ret %a } declare @llvm.cttz.nxv16i32(, i1) define @cttz_nxv1i64( %va) { -; RV32-LABEL: cttz_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 1 -; RV32-NEXT: vand.vv v10, v11, v10 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v11 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_nxv1i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32I-NEXT: vsub.vx v9, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 1 +; RV32I-NEXT: vand.vv v10, v11, v10 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v11 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI18_1) -; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI18_2) -; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI18_3) -; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_nxv1i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64I-NEXT: vsub.vx v9, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v9 +; RV64I-NEXT: lui a0, %hi(.LCPI18_0) +; RV64I-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI18_1) +; RV64I-NEXT: ld a1, %lo(.LCPI18_1)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vand.vx v9, v9, a0 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI18_2) +; RV64I-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI18_3) +; RV64I-NEXT: ld a1, %lo(.LCPI18_3)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret %a = call @llvm.cttz.nxv1i64( %va, i1 false) ret %a } declare @llvm.cttz.nxv1i64(, i1) define @cttz_nxv2i64( %va) { -; RV32-LABEL: cttz_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 1 -; RV32-NEXT: vand.vv v12, v14, v12 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v14 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_nxv2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32I-NEXT: vsub.vx v10, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 1 +; RV32I-NEXT: vand.vv v12, v14, v12 +; RV32I-NEXT: vsub.vv v8, v8, v12 +; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v14 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI19_1) -; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI19_2) -; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI19_3) -; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_nxv2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV64I-NEXT: vsub.vx v10, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v10 +; RV64I-NEXT: lui a0, %hi(.LCPI19_0) +; RV64I-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI19_1) +; RV64I-NEXT: ld a1, %lo(.LCPI19_1)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vand.vx v10, v10, a0 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI19_2) +; RV64I-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI19_3) +; RV64I-NEXT: ld a1, %lo(.LCPI19_3)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret %a = call @llvm.cttz.nxv2i64( %va, i1 false) ret %a } declare @llvm.cttz.nxv2i64(, i1) define @cttz_nxv4i64( %va) { -; RV32-LABEL: cttz_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 1 -; RV32-NEXT: vand.vv v16, v20, v16 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v20 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_nxv4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32I-NEXT: vsub.vx v12, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 1 +; RV32I-NEXT: vand.vv v16, v20, v16 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: vand.vv v16, v8, v12 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v20 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI20_1) -; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI20_2) -; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI20_3) -; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_nxv4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV64I-NEXT: vsub.vx v12, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v12 +; RV64I-NEXT: lui a0, %hi(.LCPI20_0) +; RV64I-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI20_1) +; RV64I-NEXT: ld a1, %lo(.LCPI20_1)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: vand.vx v12, v12, a0 +; RV64I-NEXT: vsub.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v12, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI20_2) +; RV64I-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI20_3) +; RV64I-NEXT: ld a1, %lo(.LCPI20_3)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret %a = call @llvm.cttz.nxv4i64( %va, i1 false) ret %a } declare @llvm.cttz.nxv4i64(, i1) define @cttz_nxv8i64( %va) { -; RV32-LABEL: cttz_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v24, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: cttz_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI21_1) -; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI21_2) -; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI21_3) -; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV32I-LABEL: cttz_nxv8i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32I-NEXT: vsub.vx v16, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 1 +; RV32I-NEXT: vand.vv v24, v0, v24 +; RV32I-NEXT: vsub.vv v8, v8, v24 +; RV32I-NEXT: vand.vv v24, v8, v16 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v0 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cttz_nxv8i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64I-NEXT: vsub.vx v16, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v16 +; RV64I-NEXT: lui a0, %hi(.LCPI21_0) +; RV64I-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI21_1) +; RV64I-NEXT: ld a1, %lo(.LCPI21_1)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI21_2) +; RV64I-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI21_3) +; RV64I-NEXT: ld a1, %lo(.LCPI21_3)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret %a = call @llvm.cttz.nxv8i64( %va, i1 false) ret %a } @@ -1445,6 +1709,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv1i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, ma @@ -1486,6 +1766,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv2i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vzext.vf4 v9, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, ma @@ -1527,6 +1823,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv4i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vzext.vf4 v10, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v10, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, ma @@ -1568,6 +1880,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv8i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vzext.vf4 v12, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v12, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, ma @@ -1609,6 +1937,22 @@ ; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 ; CHECK-ZVE64X-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv16i8: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vzext.vf4 v16, v8 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vnsrl.wi v16, v8, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-F-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv16i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1735,6 +2079,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv1i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma @@ -1806,6 +2161,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv2i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -1877,6 +2243,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv4i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -1948,6 +2325,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv8i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v10 +; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -2019,6 +2407,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv16i16: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v12 +; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-F-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -2152,6 +2551,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv1i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -2229,6 +2642,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv2i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma @@ -2306,6 +2733,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv4i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v10 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -2383,6 +2824,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; +; CHECK-F-LABEL: cttz_zero_undef_nxv8i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v12 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i32: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -2402,383 +2857,547 @@ } define @cttz_zero_undef_nxv16i32( %va) { -; RV32-LABEL: cttz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_nxv16i32: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vsub.vx v16, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vand.vx v16, v16, a0 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vand.vx v16, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vand.vx v8, v8, a0 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vmul.vx v8, v8, a0 +; RV32I-NEXT: vsrl.vi v8, v8, 24 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_zero_undef_nxv16i32: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64I-NEXT: vsub.vx v16, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v16 +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: vand.vx v16, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: vmul.vx v8, v8, a0 +; RV64I-NEXT: vsrl.vi v8, v8, 24 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_zero_undef_nxv16i32: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-F-NEXT: vrsub.vi v16, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v16 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv16i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-D-NEXT: vrsub.vi v16, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v16 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i32( %va, i1 true) ret %a } define @cttz_zero_undef_nxv1i64( %va) { -; RV32-LABEL: cttz_zero_undef_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 1 -; RV32-NEXT: vand.vv v10, v11, v10 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v11 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_nxv1i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32I-NEXT: vsub.vx v9, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 1 +; RV32I-NEXT: vand.vv v10, v11, v10 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v11, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v11 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_zero_undef_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: lui a0, %hi(.LCPI40_0) -; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_1) -; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI40_2) -; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_3) -; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_zero_undef_nxv1i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64I-NEXT: vsub.vx v9, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v9 +; RV64I-NEXT: lui a0, %hi(.LCPI40_0) +; RV64I-NEXT: ld a0, %lo(.LCPI40_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI40_1) +; RV64I-NEXT: ld a1, %lo(.LCPI40_1)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vand.vx v9, v9, a0 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI40_2) +; RV64I-NEXT: ld a0, %lo(.LCPI40_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI40_3) +; RV64I-NEXT: ld a1, %lo(.LCPI40_3)(a1) +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_zero_undef_nxv1i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-F-NEXT: vrsub.vi v9, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v9 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-F-NEXT: vzext.vf2 v9, v8 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v9, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv1i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-D-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v9, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-D-NEXT: vzext.vf2 v9, v8 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v8, v9, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv1i64( %va, i1 true) ret %a } define @cttz_zero_undef_nxv2i64( %va) { -; RV32-LABEL: cttz_zero_undef_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 1 -; RV32-NEXT: vand.vv v12, v14, v12 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v14 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_nxv2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32I-NEXT: vsub.vx v10, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 1 +; RV32I-NEXT: vand.vv v12, v14, v12 +; RV32I-NEXT: vsub.vv v8, v8, v12 +; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v14, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v14 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_zero_undef_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: lui a0, %hi(.LCPI41_0) -; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_1) -; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI41_2) -; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_3) -; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_zero_undef_nxv2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV64I-NEXT: vsub.vx v10, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v10 +; RV64I-NEXT: lui a0, %hi(.LCPI41_0) +; RV64I-NEXT: ld a0, %lo(.LCPI41_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI41_1) +; RV64I-NEXT: ld a1, %lo(.LCPI41_1)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vand.vx v10, v10, a0 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI41_2) +; RV64I-NEXT: ld a0, %lo(.LCPI41_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI41_3) +; RV64I-NEXT: ld a1, %lo(.LCPI41_3)(a1) +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_zero_undef_nxv2i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-F-NEXT: vrsub.vi v10, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v10 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-F-NEXT: vzext.vf2 v10, v8 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v10, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv2i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-D-NEXT: vrsub.vi v10, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v10 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-D-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v10, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-D-NEXT: vzext.vf2 v10, v8 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v8, v10, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv2i64( %va, i1 true) ret %a } define @cttz_zero_undef_nxv4i64( %va) { -; RV32-LABEL: cttz_zero_undef_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 1 -; RV32-NEXT: vand.vv v16, v20, v16 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v20 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_nxv4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32I-NEXT: vsub.vx v12, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 1 +; RV32I-NEXT: vand.vv v16, v20, v16 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: vand.vv v16, v8, v12 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v20, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v20 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_zero_undef_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: lui a0, %hi(.LCPI42_0) -; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_1) -; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI42_2) -; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_3) -; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_zero_undef_nxv4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV64I-NEXT: vsub.vx v12, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v12 +; RV64I-NEXT: lui a0, %hi(.LCPI42_0) +; RV64I-NEXT: ld a0, %lo(.LCPI42_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI42_1) +; RV64I-NEXT: ld a1, %lo(.LCPI42_1)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: vand.vx v12, v12, a0 +; RV64I-NEXT: vsub.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v12, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI42_2) +; RV64I-NEXT: ld a0, %lo(.LCPI42_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI42_3) +; RV64I-NEXT: ld a1, %lo(.LCPI42_3)(a1) +; RV64I-NEXT: vsrl.vi v12, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_zero_undef_nxv4i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-F-NEXT: vrsub.vi v12, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v12 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-F-NEXT: vzext.vf2 v12, v8 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v12, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv4i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-D-NEXT: vrsub.vi v12, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v12 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-D-NEXT: vfncvt.f.xu.w v12, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v12, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-D-NEXT: vzext.vf2 v12, v8 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v8, v12, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv4i64( %va, i1 true) ret %a } define @cttz_zero_undef_nxv8i64( %va) { -; RV32-LABEL: cttz_zero_undef_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v24, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_nxv8i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32I-NEXT: vsub.vx v16, v8, a0 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 1 +; RV32I-NEXT: vand.vv v24, v0, v24 +; RV32I-NEXT: vsub.vv v8, v8, v24 +; RV32I-NEXT: vand.vv v24, v8, v16 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: vlse64.v v24, (a0), zero +; RV32I-NEXT: vsrl.vi v0, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v0 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: li a0, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret ; -; RV64-LABEL: cttz_zero_undef_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI43_0) -; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_1) -; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI43_2) -; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_3) -; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; RV64I-LABEL: cttz_zero_undef_nxv8i64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64I-NEXT: vsub.vx v16, v8, a0 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v16 +; RV64I-NEXT: lui a0, %hi(.LCPI43_0) +; RV64I-NEXT: ld a0, %lo(.LCPI43_0)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI43_1) +; RV64I-NEXT: ld a1, %lo(.LCPI43_1)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: vand.vx v16, v16, a0 +; RV64I-NEXT: vsub.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v16, v8 +; RV64I-NEXT: lui a0, %hi(.LCPI43_2) +; RV64I-NEXT: ld a0, %lo(.LCPI43_2)(a0) +; RV64I-NEXT: lui a1, %hi(.LCPI43_3) +; RV64I-NEXT: ld a1, %lo(.LCPI43_3)(a1) +; RV64I-NEXT: vsrl.vi v16, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: vand.vx v8, v8, a0 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a0, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a0 +; RV64I-NEXT: ret +; +; CHECK-F-LABEL: cttz_zero_undef_nxv8i64: +; CHECK-F: # %bb.0: +; CHECK-F-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-F-NEXT: vrsub.vi v16, v8, 0 +; CHECK-F-NEXT: vand.vv v8, v8, v16 +; CHECK-F-NEXT: vmset.m v0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-F-NEXT: vzext.vf2 v16, v8 +; CHECK-F-NEXT: li a1, 127 +; CHECK-F-NEXT: vsub.vx v8, v16, a1 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv8i64: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-D-NEXT: vrsub.vi v16, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v16 +; CHECK-D-NEXT: vmset.m v0 +; CHECK-D-NEXT: fsrmi a0, 1 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-D-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v16, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-D-NEXT: vzext.vf2 v16, v8 +; CHECK-D-NEXT: li a1, 127 +; CHECK-D-NEXT: vsub.vx v8, v16, a1 +; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv8i64( %va, i1 true) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 @@ -202,6 +204,34 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret ; +; LMULMAX2-RV32F-LABEL: ctlz_v8i16: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v8 +; LMULMAX2-RV32F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 142 +; LMULMAX2-RV32F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 16 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret +; +; LMULMAX2-RV64F-LABEL: ctlz_v8i16: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v8 +; LMULMAX2-RV64F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 142 +; LMULMAX2-RV64F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 16 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; ; LMULMAX2-RV32D-LABEL: ctlz_v8i16: ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -328,81 +358,39 @@ ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: ctlz_v4i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: li a1, 158 +; LMULMAX2-RV32F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctlz_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 349525 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 209715 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 61681 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: ctlz_v4i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 158 +; LMULMAX2-RV64F-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret ; ; LMULMAX2-RV32D-LABEL: ctlz_v4i32: ; LMULMAX2-RV32D: # %bb.0: @@ -457,300 +445,228 @@ declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: li a1, 32 -; LMULMAX2-RV32-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v9, -1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vxor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-RV32I-LABEL: ctlz_v2i64: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 8 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 16 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: li a1, 32 +; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.i v9, -1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: li a1, 56 +; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX2-RV64-LABEL: ctlz_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: li a1, 32 -; LMULMAX2-RV64-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV64I-LABEL: ctlz_v2i64: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 2 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 8 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 16 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: li a1, 32 +; LMULMAX2-RV64I-NEXT: vsrl.vx v9, v8, a1 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_1)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a2 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_3)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: li a1, 56 +; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: li a1, 32 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v9, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: li a1, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: ctlz_v2i64: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v9 +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vmv.v.i v9, 0 +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32F-NEXT: li a1, 64 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctlz_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: li a1, 32 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: ctlz_v2i64: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v11, v10, v9 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 64 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v11, a1, v0 +; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: ctlz_v2i64: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32D-NEXT: li a1, 190 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV32D-NEXT: vwsubu.wv v10, v10, v9 +; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vmv.v.i v9, 0 +; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32D-NEXT: li a1, 64 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: ctlz_v2i64: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64D-NEXT: li a1, 190 +; LMULMAX2-RV64D-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64D-NEXT: vwsubu.vv v11, v10, v9 +; LMULMAX2-RV64D-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 64 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v11, a1, v0 +; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-RV32-LABEL: ctlz_v2i64: ; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v9, -1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vxor.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV32-NEXT: lui a1, 349525 -; LMULMAX8-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX8-RV32-NEXT: vmset.m v0 +; LMULMAX8-RV32-NEXT: fsrmi a1, 1 +; LMULMAX8-RV32-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; LMULMAX8-RV32-NEXT: fsrm a1 +; LMULMAX8-RV32-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX8-RV32-NEXT: li a1, 190 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX8-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: lui a1, 209715 -; LMULMAX8-RV32-NEXT: addi a1, a1, 819 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: lui a1, 61681 -; LMULMAX8-RV32-NEXT: addi a1, a1, -241 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX8-RV32-NEXT: vwsubu.wv v10, v10, v9 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX8-RV32-NEXT: vmv.v.i v9, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: lui a1, 4112 -; LMULMAX8-RV32-NEXT: addi a1, a1, 257 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: li a1, 56 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX8-RV32-NEXT: li a1, 64 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX8-RV64-LABEL: ctlz_v2i64: ; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vnot.v v8, v8 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a2 -; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: li a1, 56 -; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmset.m v0 +; LMULMAX8-RV64-NEXT: fsrmi a1, 1 +; LMULMAX8-RV64-NEXT: vfncvt.f.xu.w v9, v8, v0.t +; LMULMAX8-RV64-NEXT: fsrm a1 +; LMULMAX8-RV64-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX8-RV64-NEXT: li a1, 190 +; LMULMAX8-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX8-RV64-NEXT: vwsubu.vv v11, v10, v9 +; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-RV64-NEXT: li a1, 64 +; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v11, a1, v0 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret %a = load <2 x i64>, ptr %x @@ -1071,209 +987,149 @@ declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctlz_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV32I-LABEL: ctlz_v8i32: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 16 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a2, 349525 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a3, 209715 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 61681 -; LMULMAX1-RV32-NEXT: addi a4, a4, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: lui a5, 4112 -; LMULMAX1-RV32-NEXT: addi a5, a5, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 16 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV64I-LABEL: ctlz_v8i32: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 16 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctlz_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 349525 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 209715 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 61681 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: lui a5, 4112 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 16 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV32F-LABEL: ctlz_v8i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 158 +; LMULMAX2-RV32F-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret +; +; LMULMAX2-RV64F-LABEL: ctlz_v8i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 158 +; LMULMAX2-RV64F-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: ctlz_v8i32: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32D-NEXT: li a1, 158 +; LMULMAX2-RV32D-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32D-NEXT: li a1, 32 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: ctlz_v8i32: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64D-NEXT: li a1, 158 +; LMULMAX2-RV64D-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 32 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-LABEL: ctlz_v8i32: ; LMULMAX8: # %bb.0: @@ -1298,356 +1154,228 @@ declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: li a1, 32 -; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v12, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-RV32I-LABEL: ctlz_v4i64: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 16 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: li a1, 32 +; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.i v10, -1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: li a1, 56 +; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX2-RV64-LABEL: ctlz_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: li a1, 32 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV64I-LABEL: ctlz_v4i64: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 16 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: li a1, 32 +; LMULMAX2-RV64I-NEXT: vsrl.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a2 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_3)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: li a1, 56 +; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: li a2, 32 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 349525 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v12, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: lui a3, 209715 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v13, v8, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v13, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: lui a3, 61681 -; LMULMAX1-RV32-NEXT: addi a3, a3, -241 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v13, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: lui a3, 4112 -; LMULMAX1-RV32-NEXT: addi a3, a3, 257 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v14 -; LMULMAX1-RV32-NEXT: li a3, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v9, 1 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v9, 16 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vsrl.vx v15, v9, a2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v10, v9, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v14 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: ctlz_v4i64: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vwsubu.wv v12, v12, v10 +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32F-NEXT: li a1, 64 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctlz_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: li a2, 32 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) -; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_0)(a3) -; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_1) -; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_1)(a4) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) -; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) -; LMULMAX1-RV64-NEXT: lui a6, %hi(.LCPI7_3) -; LMULMAX1-RV64-NEXT: ld a6, %lo(.LCPI7_3)(a6) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: li a7, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 16 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: ctlz_v4i64: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v11, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v11, v10 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 64 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: ctlz_v4i64: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32D-NEXT: li a1, 190 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vwsubu.wv v12, v12, v10 +; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32D-NEXT: li a1, 64 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: ctlz_v4i64: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64D-NEXT: li a1, 190 +; LMULMAX2-RV64D-NEXT: vmv.v.x v11, a1 +; LMULMAX2-RV64D-NEXT: vwsubu.vv v12, v11, v10 +; LMULMAX2-RV64D-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 64 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-RV32-LABEL: ctlz_v4i64: ; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX8-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV32-NEXT: lui a1, 349525 -; LMULMAX8-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX8-RV32-NEXT: vmset.m v0 +; LMULMAX8-RV32-NEXT: fsrmi a1, 1 +; LMULMAX8-RV32-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; LMULMAX8-RV32-NEXT: fsrm a1 +; LMULMAX8-RV32-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX8-RV32-NEXT: li a1, 190 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX8-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: lui a1, 209715 -; LMULMAX8-RV32-NEXT: addi a1, a1, 819 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v12, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v12, v8 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: lui a1, 61681 -; LMULMAX8-RV32-NEXT: addi a1, a1, -241 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX8-RV32-NEXT: vwsubu.wv v12, v12, v10 ; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: lui a1, 4112 -; LMULMAX8-RV32-NEXT: addi a1, a1, 257 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmul.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: li a1, 56 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX8-RV32-NEXT: li a1, 64 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v12, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX8-RV64-LABEL: ctlz_v4i64: ; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vnot.v v8, v8 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a2 -; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: li a1, 56 -; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmset.m v0 +; LMULMAX8-RV64-NEXT: fsrmi a1, 1 +; LMULMAX8-RV64-NEXT: vfncvt.f.xu.w v10, v8, v0.t +; LMULMAX8-RV64-NEXT: fsrm a1 +; LMULMAX8-RV64-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX8-RV64-NEXT: li a1, 190 +; LMULMAX8-RV64-NEXT: vmv.v.x v11, a1 +; LMULMAX8-RV64-NEXT: vwsubu.vv v12, v11, v10 +; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-RV64-NEXT: li a1, 64 +; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v12, a1, v0 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret %a = load <4 x i64>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I ; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+f -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 @@ -182,6 +184,38 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret ; +; LMULMAX2-RV32F-LABEL: cttz_v8i16: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v9 +; LMULMAX2-RV32F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 16 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret +; +; LMULMAX2-RV64F-LABEL: cttz_v8i16: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v9 +; LMULMAX2-RV64F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 16 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; ; LMULMAX2-RV32D-LABEL: cttz_v8i16: ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -300,67 +334,43 @@ ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: cttz_v4i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: cttz_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 349525 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 209715 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 61681 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: cttz_v4i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v9, v9, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret ; ; LMULMAX2-RV32D-LABEL: cttz_v4i32: ; LMULMAX2-RV32D: # %bb.0: @@ -421,208 +431,202 @@ declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) define void @cttz_v2i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-RV32I-LABEL: cttz_v2i64: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.i v10, -1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: li a1, 56 +; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX2-RV64-LABEL: cttz_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV64I-LABEL: cttz_v2i64: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: li a1, 1 +; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_1)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a2 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_3)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: li a1, 56 +; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: li a1, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: cttz_v2i64: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vle64.v v9, (a0) +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vmseq.vv v8, v9, v10 +; LMULMAX2-RV32F-NEXT: vsub.vv v10, v10, v9 +; LMULMAX2-RV32F-NEXT: vand.vv v9, v9, v10 +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v9, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v10, 23 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v9 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v9, v10, a1 +; LMULMAX2-RV32F-NEXT: li a1, 64 +; LMULMAX2-RV32F-NEXT: vmv.v.v v0, v8 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: cttz_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: cttz_v2i64: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v9, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vwsubu.vx v10, v9, a1 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 64 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: cttz_v2i64: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vle64.v v9, (a0) +; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vmseq.vv v8, v9, v10 +; LMULMAX2-RV32D-NEXT: vsub.vv v10, v10, v9 +; LMULMAX2-RV32D-NEXT: vand.vv v9, v9, v10 +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV32D-NEXT: vfncvt.f.xu.w v10, v9, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v9, v10, 23 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vzext.vf2 v10, v9 +; LMULMAX2-RV32D-NEXT: li a1, 127 +; LMULMAX2-RV32D-NEXT: vsub.vx v9, v10, a1 +; LMULMAX2-RV32D-NEXT: li a1, 64 +; LMULMAX2-RV32D-NEXT: vmv.v.v v0, v8 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: cttz_v2i64: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV64D-NEXT: vand.vv v9, v8, v9 +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV64D-NEXT: vfncvt.f.xu.w v10, v9, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v9, v10, 23 +; LMULMAX2-RV64D-NEXT: li a1, 127 +; LMULMAX2-RV64D-NEXT: vwsubu.vx v10, v9, a1 +; LMULMAX2-RV64D-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 64 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-RV32-LABEL: cttz_v2i64: ; LMULMAX8-RV32: # %bb.0: ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: li a1, 1 -; LMULMAX8-RV32-NEXT: vsub.vx v9, v8, a1 +; LMULMAX8-RV32-NEXT: vle64.v v9, (a0) ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV32-NEXT: lui a1, 349525 -; LMULMAX8-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX8-RV32-NEXT: vmseq.vv v8, v9, v10 +; LMULMAX8-RV32-NEXT: vsub.vv v10, v10, v9 ; LMULMAX8-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX8-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: lui a1, 209715 -; LMULMAX8-RV32-NEXT: addi a1, a1, 819 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX8-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX8-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: lui a1, 61681 -; LMULMAX8-RV32-NEXT: addi a1, a1, -241 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: lui a1, 4112 -; LMULMAX8-RV32-NEXT: addi a1, a1, 257 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: li a1, 56 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vmset.m v0 +; LMULMAX8-RV32-NEXT: fsrmi a1, 1 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX8-RV32-NEXT: vfncvt.f.xu.w v10, v9, v0.t +; LMULMAX8-RV32-NEXT: fsrm a1 +; LMULMAX8-RV32-NEXT: vsrl.vi v9, v10, 23 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX8-RV32-NEXT: vzext.vf2 v10, v9 +; LMULMAX8-RV32-NEXT: li a1, 127 +; LMULMAX8-RV32-NEXT: vsub.vx v9, v10, a1 +; LMULMAX8-RV32-NEXT: li a1, 64 +; LMULMAX8-RV32-NEXT: vmv.v.v v0, v8 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v9, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; @@ -630,31 +634,20 @@ ; LMULMAX8-RV64: # %bb.0: ; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: li a1, 1 -; LMULMAX8-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX8-RV64-NEXT: vnot.v v8, v8 -; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a2 -; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: li a1, 56 -; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX8-RV64-NEXT: vand.vv v9, v8, v9 +; LMULMAX8-RV64-NEXT: vmset.m v0 +; LMULMAX8-RV64-NEXT: fsrmi a1, 1 +; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX8-RV64-NEXT: vfncvt.f.xu.w v10, v9, v0.t +; LMULMAX8-RV64-NEXT: fsrm a1 +; LMULMAX8-RV64-NEXT: vsrl.vi v9, v10, 23 +; LMULMAX8-RV64-NEXT: li a1, 127 +; LMULMAX8-RV64-NEXT: vwsubu.vx v10, v9, a1 +; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-RV64-NEXT: li a1, 64 +; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret %a = load <2 x i64>, ptr %x @@ -938,165 +931,143 @@ declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) define void @cttz_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-RV32I-LABEL: cttz_v8i32: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX2-RV64-LABEL: cttz_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV64I-LABEL: cttz_v8i32: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: li a1, 1 +; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 349525 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 209715 -; LMULMAX1-RV32-NEXT: addi a4, a4, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a5, 61681 -; LMULMAX1-RV32-NEXT: addi a5, a5, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: lui a6, 4112 -; LMULMAX1-RV32-NEXT: addi a6, a6, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: cttz_v8i32: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: li a1, 32 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: cttz_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a2, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 349525 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 209715 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 61681 -; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a6, 4112 -; LMULMAX1-RV64-NEXT: addiw a6, a6, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: cttz_v8i32: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 32 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: cttz_v8i32: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32D-NEXT: li a1, 127 +; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32D-NEXT: li a1, 32 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: cttz_v8i32: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV64D-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v10, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64D-NEXT: li a1, 127 +; LMULMAX2-RV64D-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 32 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-LABEL: cttz_v8i32: ; LMULMAX8: # %bb.0: @@ -1123,244 +1094,202 @@ declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) define void @cttz_v4i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v12, -1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vxor.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v12, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-RV32I-LABEL: cttz_v4i64: +; LMULMAX2-RV32I: # %bb.0: +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.i v12, -1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v12 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: li a1, 56 +; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: ret ; -; LMULMAX2-RV64-LABEL: cttz_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-RV64I-LABEL: cttz_v4i64: +; LMULMAX2-RV64I: # %bb.0: +; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: li a1, 1 +; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a2 +; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_3)(a2) +; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: li a1, 56 +; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v11, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 349525 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v12, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a3, 209715 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v13, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v13, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: lui a3, 61681 -; LMULMAX1-RV32-NEXT: addi a3, a3, -241 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v13, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: lui a3, 4112 -; LMULMAX1-RV32-NEXT: addi a3, a3, 257 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v14 -; LMULMAX1-RV32-NEXT: li a3, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vsub.vx v15, v9, a2 -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v11, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v14 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-RV32F-LABEL: cttz_v4i64: +; LMULMAX2-RV32F: # %bb.0: +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vle64.v v10, (a0) +; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vmseq.vv v8, v10, v12 +; LMULMAX2-RV32F-NEXT: vsub.vv v12, v12, v10 +; LMULMAX2-RV32F-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32F-NEXT: vmset.m v0 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v10, v0.t +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v9 +; LMULMAX2-RV32F-NEXT: li a1, 127 +; LMULMAX2-RV32F-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32F-NEXT: li a1, 64 +; LMULMAX2-RV32F-NEXT: vmv1r.v v0, v8 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: ret ; -; LMULMAX1-RV64-LABEL: cttz_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a2, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) -; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_0)(a3) -; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_1) -; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_1)(a4) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) -; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) -; LMULMAX1-RV64-NEXT: lui a6, %hi(.LCPI7_3) -; LMULMAX1-RV64-NEXT: ld a6, %lo(.LCPI7_3)(a6) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: li a7, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-RV64F-LABEL: cttz_v4i64: +; LMULMAX2-RV64F: # %bb.0: +; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV64F-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV64F-NEXT: vmset.m v0 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v12, v10, v0.t +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v12, 23 +; LMULMAX2-RV64F-NEXT: li a1, 127 +; LMULMAX2-RV64F-NEXT: vwsubu.vx v12, v10, a1 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: li a1, 64 +; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: ret +; +; LMULMAX2-RV32D-LABEL: cttz_v4i64: +; LMULMAX2-RV32D: # %bb.0: +; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vle64.v v10, (a0) +; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vmseq.vv v8, v10, v12 +; LMULMAX2-RV32D-NEXT: vsub.vv v12, v12, v10 +; LMULMAX2-RV32D-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32D-NEXT: vmset.m v0 +; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV32D-NEXT: vfncvt.f.xu.w v9, v10, v0.t +; LMULMAX2-RV32D-NEXT: fsrm a1 +; LMULMAX2-RV32D-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32D-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV32D-NEXT: vzext.vf2 v10, v9 +; LMULMAX2-RV32D-NEXT: li a1, 127 +; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: li a1, 64 +; LMULMAX2-RV32D-NEXT: vmv1r.v v0, v8 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32D-NEXT: ret +; +; LMULMAX2-RV64D-LABEL: cttz_v4i64: +; LMULMAX2-RV64D: # %bb.0: +; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV64D-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV64D-NEXT: vmset.m v0 +; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV64D-NEXT: vfncvt.f.xu.w v12, v10, v0.t +; LMULMAX2-RV64D-NEXT: fsrm a1 +; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v12, 23 +; LMULMAX2-RV64D-NEXT: li a1, 127 +; LMULMAX2-RV64D-NEXT: vwsubu.vx v12, v10, a1 +; LMULMAX2-RV64D-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: li a1, 64 +; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV64D-NEXT: ret ; ; LMULMAX8-RV32-LABEL: cttz_v4i64: ; LMULMAX8-RV32: # %bb.0: ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: li a1, 1 -; LMULMAX8-RV32-NEXT: vsub.vx v10, v8, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v12, -1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vxor.vv v8, v8, v12 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV32-NEXT: lui a1, 349525 -; LMULMAX8-RV32-NEXT: addi a1, a1, 1365 +; LMULMAX8-RV32-NEXT: vle64.v v10, (a0) ; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX8-RV32-NEXT: vmv.v.i v12, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX8-RV32-NEXT: vmseq.vv v8, v10, v12 +; LMULMAX8-RV32-NEXT: vsub.vv v12, v12, v10 ; LMULMAX8-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX8-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: lui a1, 209715 -; LMULMAX8-RV32-NEXT: addi a1, a1, 819 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v12, v8, v10 -; LMULMAX8-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v12, v8 -; LMULMAX8-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: lui a1, 61681 -; LMULMAX8-RV32-NEXT: addi a1, a1, -241 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: lui a1, 4112 -; LMULMAX8-RV32-NEXT: addi a1, a1, 257 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmul.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: li a1, 56 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vmset.m v0 +; LMULMAX8-RV32-NEXT: fsrmi a1, 1 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX8-RV32-NEXT: vfncvt.f.xu.w v9, v10, v0.t +; LMULMAX8-RV32-NEXT: fsrm a1 +; LMULMAX8-RV32-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX8-RV32-NEXT: vzext.vf2 v10, v9 +; LMULMAX8-RV32-NEXT: li a1, 127 +; LMULMAX8-RV32-NEXT: vsub.vx v10, v10, a1 +; LMULMAX8-RV32-NEXT: li a1, 64 +; LMULMAX8-RV32-NEXT: vmv1r.v v0, v8 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; @@ -1368,31 +1297,20 @@ ; LMULMAX8-RV64: # %bb.0: ; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: li a1, 1 -; LMULMAX8-RV64-NEXT: vsub.vx v10, v8, a1 -; LMULMAX8-RV64-NEXT: vnot.v v8, v8 -; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a2 -; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) -; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) -; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX8-RV64-NEXT: li a1, 56 -; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX8-RV64-NEXT: vand.vv v10, v8, v10 +; LMULMAX8-RV64-NEXT: vmset.m v0 +; LMULMAX8-RV64-NEXT: fsrmi a1, 1 +; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX8-RV64-NEXT: vfncvt.f.xu.w v12, v10, v0.t +; LMULMAX8-RV64-NEXT: fsrm a1 +; LMULMAX8-RV64-NEXT: vsrl.vi v10, v12, 23 +; LMULMAX8-RV64-NEXT: li a1, 127 +; LMULMAX8-RV64-NEXT: vwsubu.vx v12, v10, a1 +; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-RV64-NEXT: li a1, 64 +; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v12, a1, v0 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret %a = load <4 x i64>, ptr %x