diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -758,9 +758,10 @@ // of f32. EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); if (isTypeLegal(FloatVT)) { - setOperationAction( - {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); + setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, + ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, + ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, + VT, Custom); } } @@ -4276,6 +4277,16 @@ unsigned EltSize = VT.getScalarSizeInBits(); SDValue Src = Op.getOperand(0); SDLoc DL(Op); + MVT ContainerVT = VT; + + SDValue Mask, VL; + if (Op->isVPOpcode()) { + Mask = Op.getOperand(1); + if (VT.isFixedLengthVector()) + Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, + Subtarget); + VL = Op.getOperand(2); + } // We choose FP type that can represent the value if possible. Otherwise, we // use rounding to zero conversion for correct exponent of the result. @@ -4296,21 +4307,26 @@ if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { SDValue Neg = DAG.getNegative(Src, DL, VT); Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); + } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { + SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), + Src, Mask, VL); + Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL); } // We have a legal FP type, convert to it. SDValue FloatVal; if (FloatVT.bitsGT(VT)) { FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); + if (Op->isVPOpcode()) + FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL); } else { // Use RTZ to avoid rounding influencing exponent of FloatVal. - MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VT); Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); } - - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + if (!Op->isVPOpcode()) + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); SDValue RTZRM = DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); MVT ContainerFloatVT = @@ -4326,28 +4342,49 @@ unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, DAG.getConstant(ShiftAmt, DL, IntVT)); - // Restore back to original type. Truncation after SRL is to generate vnsrl. - if (IntVT.bitsLT(VT)) - Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); - else if (IntVT.bitsGT(VT)) - Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); - // The exponent contains log2 of the value in biased form. + if (Op->isVPOpcode()) { + Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast, + DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL); + // Restore back to original type. Truncation after SRL is to generate vnsrl. + if (IntVT.bitsLT(VT)) + Exp = DAG.getNode(ISD::VP_ZERO_EXTEND, DL, VT, Exp, Mask, VL); + else if (IntVT.bitsGT(VT)) + Exp = DAG.getNode(ISD::VP_TRUNCATE, DL, VT, Exp, Mask, VL); + } else { + // Restore back to original type. Truncation after SRL is to generate vnsrl. + if (IntVT.bitsLT(VT)) + Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); + else if (IntVT.bitsGT(VT)) + Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); + // The exponent contains log2 of the value in biased form. + } unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; - // For trailing zeros, we just need to subtract the bias. if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) return DAG.getNode(ISD::SUB, DL, VT, Exp, DAG.getConstant(ExponentBias, DL, VT)); + if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) + return DAG.getNode(ISD::VP_SUB, DL, VT, Exp, + DAG.getConstant(ExponentBias, DL, VT), Mask, VL); + // For leading zeros, we need to remove the bias and convert from log2 to // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). unsigned Adjust = ExponentBias + (EltSize - 1); SDValue Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); + + if (Op->isVPOpcode()) + Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp, + Mask, VL); + // The above result with zero input equals to Adjust which is greater than // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. if (Op.getOpcode() == ISD::CTLZ) Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); + else if (Op.getOpcode() == ISD::VP_CTLZ) + Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res, + DAG.getConstant(EltSize, DL, VT), Mask, VL); return Res; } @@ -5305,6 +5342,9 @@ case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ_ZERO_UNDEF: + case ISD::VP_CTLZ: + case ISD::VP_CTLZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ZERO_UNDEF: return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); case ISD::VSELECT: return lowerFixedLengthVectorSelectToRVV(Op, DAG); diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -9,26 +9,19 @@ define @vp_ctlz_nxv1i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 false, %m, i32 %evl) ret %v @@ -37,26 +30,16 @@ define @vp_ctlz_nxv1i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -69,26 +52,19 @@ define @vp_ctlz_nxv2i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 false, %m, i32 %evl) ret %v @@ -97,26 +73,16 @@ define @vp_ctlz_nxv2i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -129,26 +95,19 @@ define @vp_ctlz_nxv4i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 false, %m, i32 %evl) ret %v @@ -157,26 +116,16 @@ define @vp_ctlz_nxv4i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -189,26 +138,19 @@ define @vp_ctlz_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 false, %m, i32 %evl) ret %v @@ -217,26 +159,16 @@ define @vp_ctlz_nxv8i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v10, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -249,26 +181,19 @@ define @vp_ctlz_nxv16i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 false, %m, i32 %evl) ret %v @@ -277,26 +202,16 @@ define @vp_ctlz_nxv16i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v10, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v12, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -427,31 +342,216 @@ declare @llvm.vp.ctlz.nxv1i16(, i1 immarg, , i32) define @vp_ctlz_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i16: +; CHECK-LABEL: vp_ctlz_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv1i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv2i16(, i1 immarg, , i32) + +define @vp_ctlz_nxv2i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv2i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv4i16(, i1 immarg, , i32) + +define @vp_ctlz_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv4i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv8i16(, i1 immarg, , i32) + +define @vp_ctlz_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv16i16(, i1 immarg, , i32) + +define @vp_ctlz_nxv16i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv16i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv32i16(, i1 immarg, , i32) + +define @vp_ctlz_nxv32i16( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_nxv32i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a0, 5 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: vand.vx v16, v16, a0, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: lui a0, 3 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t +; RV32-NEXT: vand.vx v16, v8, a0, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: lui a0, 1 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v8, v8, a0, v0.t @@ -460,31 +560,31 @@ ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_nxv1i16: +; RV64-LABEL: vp_ctlz_nxv32i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: lui a0, 5 ; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vsub.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 3 ; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 1 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t @@ -492,36 +592,36 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i16_unmasked: +define @vp_ctlz_nxv32i16_unmasked( %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_nxv32i16_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a0, 5 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a0, 3 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a0 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: lui a0, 1 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v8, v8, a0 @@ -530,31 +630,31 @@ ; RV32-NEXT: vsrl.vi v8, v8, 8 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_nxv1i16_unmasked: +; RV64-LABEL: vp_ctlz_nxv32i16_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 8 +; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vsrl.vi v16, v8, 1 ; RV64-NEXT: lui a0, 5 ; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 +; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vsub.vv v8, v8, v16 ; RV64-NEXT: lui a0, 3 ; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v16 ; RV64-NEXT: lui a0, 1 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 @@ -562,6328 +662,955 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv1i32(, i1 immarg, , i32) + +define @vp_ctlz_nxv1i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv1i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v9, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv2i16(, i1 immarg, , i32) +declare @llvm.vp.ctlz.nxv2i32(, i1 immarg, , i32) -define @vp_ctlz_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_ctlz_nxv2i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v10, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv4i16(, i1 immarg, , i32) +declare @llvm.vp.ctlz.nxv4i32(, i1 immarg, , i32) -define @vp_ctlz_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_nxv4i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_ctlz_nxv4i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v12, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv8i16(, i1 immarg, , i32) +declare @llvm.vp.ctlz.nxv8i32(, i1 immarg, , i32) -define @vp_ctlz_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_ctlz_nxv8i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v16, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv16i16(, i1 immarg, , i32) +declare @llvm.vp.ctlz.nxv16i32(, i1 immarg, , i32) -define @vp_ctlz_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_ctlz_nxv16i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 23 +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv32i16(, i1 immarg, , i32) +declare @llvm.vp.ctlz.nxv1i64(, i1 immarg, , i32) -define @vp_ctlz_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_nxv1i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv1i32(, i1 immarg, , i32) +declare @llvm.vp.ctlz.nxv2i64(, i1 immarg, , i32) -define @vp_ctlz_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_nxv2i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_ctlz_nxv2i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv4i64(, i1 immarg, , i32) + +define @vp_ctlz_nxv4i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv7i64(, i1 immarg, , i32) + +define @vp_ctlz_nxv7i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv7i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv7i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv8i64(, i1 immarg, , i32) + +define @vp_ctlz_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ctlz.nxv16i64(, i1 immarg, , i32) + +define @vp_ctlz_nxv16i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2, v0.t +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3, v0.t +; CHECK-NEXT: li a4, 64 +; CHECK-NEXT: vminu.vx v16, v16, a4, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB46_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB46_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: vminu.vx v8, v8, a4, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_nxv16i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2 +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3 +; CHECK-NEXT: li a4, 64 +; CHECK-NEXT: vminu.vx v16, v16, a4 +; CHECK-NEXT: bltu a0, a1, .LBB47_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB47_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vrsub.vx v8, v8, a3 +; CHECK-NEXT: vminu.vx v8, v8, a4 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv1i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv1i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv2i32(, i1 immarg, , i32) -define @vp_ctlz_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv2i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_ctlz_zero_undef_nxv2i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv4i32(, i1 immarg, , i32) -define @vp_ctlz_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv4i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_ctlz_zero_undef_nxv4i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv8i32(, i1 immarg, , i32) -define @vp_ctlz_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_ctlz_zero_undef_nxv8i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v10, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv16i32(, i1 immarg, , i32) -define @vp_ctlz_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv16i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_ctlz_zero_undef_nxv16i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v12, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv1i64(, i1 immarg, , i32) -define @vp_ctlz_nxv1i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv32i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: li a0, 51 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_nxv1i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv32i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: li a0, 51 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, %m, i32 %evl) + ret %v } -declare @llvm.vp.ctlz.nxv2i64(, i1 immarg, , i32) -define @vp_ctlz_nxv2i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_nxv2i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -declare @llvm.vp.ctlz.nxv4i64(, i1 immarg, , i32) - -define @vp_ctlz_nxv4i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_nxv4i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -declare @llvm.vp.ctlz.nxv7i64(, i1 immarg, , i32) - -define @vp_ctlz_nxv7i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv7i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv7i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_nxv7i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv7i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv7i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -declare @llvm.vp.ctlz.nxv8i64(, i1 immarg, , i32) - -define @vp_ctlz_nxv8i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_nxv8i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -declare @llvm.vp.ctlz.nxv16i64(, i1 immarg, , i32) - -define @vp_ctlz_nxv16i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a2, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB46_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB46_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v16, a4, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi t0, sp, 16 -; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB46_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB46_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_nxv16i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 16 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v16, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v24, v24, v16 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB47_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB47_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 16 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v24, v16, a2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v24, v16, a4 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 -; RV64-NEXT: bltu a0, a1, .LBB47_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB47_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vmul.vx v8, v8, a6 -; RV64-NEXT: vsrl.vx v8, v8, a7 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv1i8( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t -; CHECK-NEXT: ret - %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv1i8_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv2i8( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t -; CHECK-NEXT: ret - %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv2i8_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv4i8( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t -; CHECK-NEXT: ret - %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv4i8_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv8i8( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t -; CHECK-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv8i8_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv16i8( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t -; CHECK-NEXT: ret - %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv16i8_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v10, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv32i8( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v12, v0.t -; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v12, v0.t -; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v12, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v12, v12, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v12, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t -; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t -; CHECK-NEXT: ret - %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv32i8_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i8_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsrl.vi v12, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v12 -; CHECK-NEXT: vsrl.vi v12, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v12 -; CHECK-NEXT: vsrl.vi v12, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v12 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v12, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v12, v12, a0 -; CHECK-NEXT: vsub.vv v8, v8, v12 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v12, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v12, v8 -; CHECK-NEXT: vsrl.vi v12, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v12 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv64i8( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv64i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t -; CHECK-NEXT: ret - %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv64i8_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ctlz_zero_undef_nxv64i8_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsrl.vi v16, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsrl.vi v16, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsrl.vi v16, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v16, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v16, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v16, v8 -; CHECK-NEXT: vsrl.vi v16, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv64i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: li a0, 51 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv64i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: li a0, 51 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv1i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +define @vp_ctlz_zero_undef_nxv1i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv2i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv2i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +define @vp_ctlz_zero_undef_nxv4i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_ctlz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64: +define @vp_ctlz_zero_undef_nxv16i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv16i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -6892,47 +1619,31 @@ ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: lui a0, 5 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vand.vx v16, v16, a0, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: lui a0, 3 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vand.vx v16, v8, a0, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: li a0, 257 +; RV32-NEXT: vmul.vx v8, v8, a0, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64: +; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -6941,68 +1652,35 @@ ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: lui a0, 5 ; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: lui a0, 3 ; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: lui a0, 1 ; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: li a0, 257 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_ctlz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: +define @vp_ctlz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 2 @@ -7011,47 +1689,31 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: lui a0, 5 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vand.vx v16, v16, a0 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: lui a0, 3 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vand.vx v16, v8, a0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: li a0, 257 +; RV32-NEXT: vmul.vx v8, v8, a0 +; RV32-NEXT: vsrl.vi v8, v8, 8 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: +; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 2 @@ -7060,42 +1722,371 @@ ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 8 ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: lui a0, 5 ; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: lui a0, 3 ; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: lui a0, 1 ; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: li a0, 257 ; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 +; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv1i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv1i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v9, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv2i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v10, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv4i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv4i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v12, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv8i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v16, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv16i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 23 +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 true, %m, i32 %evl) @@ -7103,571 +2094,68 @@ } define @vp_ctlz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a2, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB94_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB94_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v16, a4, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi t0, sp, 16 -; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB94_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2, v0.t +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB94_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB94_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) ret %v } define @vp_ctlz_zero_undef_nxv16i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 16 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v16, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v24, v24, v16 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB95_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB95_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 16 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v24, v16, a2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v24, v16, a4 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 -; RV64-NEXT: bltu a0, a1, .LBB95_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vmul.vx v8, v8, a6 -; RV64-NEXT: vsrl.vx v8, v8, a7 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2 +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3 +; CHECK-NEXT: bltu a0, a1, .LBB95_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB95_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vrsub.vx v8, v8, a3 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -3258,23 +3258,20 @@ define @vp_cttz_zero_undef_nxv1i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.cttz.nxv1i8( %va, i1 true, %m, i32 %evl) ret %v @@ -3283,23 +3280,17 @@ define @vp_cttz_zero_undef_nxv1i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3311,23 +3302,20 @@ define @vp_cttz_zero_undef_nxv2i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.cttz.nxv2i8( %va, i1 true, %m, i32 %evl) ret %v @@ -3336,23 +3324,17 @@ define @vp_cttz_zero_undef_nxv2i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3364,23 +3346,20 @@ define @vp_cttz_zero_undef_nxv4i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.cttz.nxv4i8( %va, i1 true, %m, i32 %evl) ret %v @@ -3389,23 +3368,17 @@ define @vp_cttz_zero_undef_nxv4i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3417,23 +3390,20 @@ define @vp_cttz_zero_undef_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.cttz.nxv8i8( %va, i1 true, %m, i32 %evl) ret %v @@ -3442,23 +3412,17 @@ define @vp_cttz_zero_undef_nxv8i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v10, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3470,23 +3434,20 @@ define @vp_cttz_zero_undef_nxv16i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.cttz.nxv16i8( %va, i1 true, %m, i32 %evl) ret %v @@ -3495,23 +3456,17 @@ define @vp_cttz_zero_undef_nxv16i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v10, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v12, a0 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3627,121 +3582,34 @@ define @vp_cttz_zero_undef_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret %v = call @llvm.vp.cttz.nxv1i16( %va, i1 true, %m, i32 %evl) ret %v } define @vp_cttz_zero_undef_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i16( %va, i1 true, %m, i32 %evl) @@ -3750,26 +3618,170 @@ define @vp_cttz_zero_undef_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i16: +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv2i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv4i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv16i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv16i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_cttz_zero_undef_nxv32i16: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a0, 5 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: vand.vx v16, v16, a0, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: lui a0, 3 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t +; RV32-NEXT: vand.vx v16, v8, a0, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: lui a0, 1 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v8, v8, a0, v0.t @@ -3778,26 +3790,26 @@ ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i16: +; RV64-LABEL: vp_cttz_zero_undef_nxv32i16: ; RV64: # %bb.0: ; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t +; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: vand.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: lui a0, 5 ; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vsub.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 3 ; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 1 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t @@ -3805,31 +3817,31 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: +define @vp_cttz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { +; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a0, 5 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a0, 3 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a0 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: lui a0, 1 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v8, v8, a0 @@ -3838,26 +3850,26 @@ ; RV32-NEXT: vsrl.vi v8, v8, 8 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: +; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 +; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 1 ; RV64-NEXT: lui a0, 5 ; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 +; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vsub.vv v8, v8, v16 ; RV64-NEXT: lui a0, 3 ; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v16 ; RV64-NEXT: lui a0, 1 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 @@ -3865,2615 +3877,491 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) - ret %v + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv1i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv1i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v9, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv2i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v10, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv4i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv4i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v12, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv8i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v16, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv16i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 23 +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) - ret %v +define @vp_cttz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) + ret %v } - -define @vp_cttz_zero_undef_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv7i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv7i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a2, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: li a2, 1 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB94_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB94_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vsub.vx v16, v8, a2, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a3, a3, a2 -; RV64-NEXT: li a2, 1 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v8, v16, a2, v0.t -; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vv v16, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v16, a4, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi t0, sp, 16 -; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB94_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsub.vx v16, v8, a2, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret - %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) - ret %v -} +define @vp_cttz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t +; CHECK-NEXT: vand.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: li a3, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a0, a1, .LBB94_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB94_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) + ret %v +} define @vp_cttz_zero_undef_nxv16i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: li a2, 1 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2 -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v16, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v24, v24, v16 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB95_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB95_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v8, a2 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a3, a3, a2 -; RV64-NEXT: li a2, 1 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v24, v16, a4 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 -; RV64-NEXT: bltu a0, a1, .LBB95_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v8, a2 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vmul.vx v8, v8, a6 -; RV64-NEXT: vsrl.vx v8, v8, a7 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v24, v16, 0 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2 +; CHECK-NEXT: li a3, 1023 +; CHECK-NEXT: vsub.vx v16, v16, a3 +; CHECK-NEXT: bltu a0, a1, .LBB95_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB95_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v24, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v24 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vsub.vx v8, v8, a3 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl)