diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -681,8 +681,9 @@ // of f32. EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); if (isTypeLegal(FloatVT)) { - setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); + setOperationAction( + {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); } } @@ -912,8 +913,9 @@ // range of f32. EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); if (isTypeLegal(FloatVT)) - setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); + setOperationAction( + {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -3599,7 +3601,13 @@ // For leading zeros, we need to remove the bias and convert from log2 to // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). unsigned Adjust = ExponentBias + (EltSize - 1); - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); + SDValue Res = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); + // The above result with zero input equals to Adjust which is greater than + // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. + if (Op.getOpcode() == ISD::CTLZ) + Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); + return Res; } // While RVV has alignment restrictions, we should always be able to load as a @@ -4218,6 +4226,7 @@ case ISD::ABS: case ISD::VP_ABS: return lowerABS(Op, DAG); + case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ_ZERO_UNDEF: return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -35,28 +35,28 @@ ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-F-NEXT: vzext.vf2 v9, v8 -; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 -; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-F-NEXT: li a0, 134 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vrsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 8 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv1i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-D-NEXT: vzext.vf2 v9, v8 -; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 -; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-D-NEXT: li a0, 134 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vrsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 8 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i8( %va, i1 false) ret %a @@ -92,28 +92,28 @@ ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-F-NEXT: vzext.vf2 v9, v8 -; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 -; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-F-NEXT: vnsrl.wi v8, v8, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-F-NEXT: li a0, 134 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vrsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 8 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv2i8: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-D-NEXT: vzext.vf2 v9, v8 -; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 -; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-D-NEXT: li a0, 134 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vrsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 8 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i8( %va, i1 false) ret %a @@ -150,13 +150,13 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-F-NEXT: vzext.vf2 v9, v8 ; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9 -; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v10, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-F-NEXT: li a0, 134 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vrsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 8 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv4i8: @@ -164,13 +164,13 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-D-NEXT: vzext.vf2 v9, v8 ; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 -; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v10, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-D-NEXT: li a0, 134 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vrsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 8 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i8( %va, i1 false) ret %a @@ -207,13 +207,13 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-F-NEXT: vzext.vf2 v10, v8 ; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v10 -; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v12, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-F-NEXT: vnsrl.wi v10, v8, 0 ; CHECK-F-NEXT: li a0, 134 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vrsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vrsub.vx v8, v10, a0 +; CHECK-F-NEXT: li a0, 8 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv8i8: @@ -221,13 +221,13 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-D-NEXT: vzext.vf2 v10, v8 ; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10 -; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v12, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-D-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-D-NEXT: vnsrl.wi v10, v8, 0 ; CHECK-D-NEXT: li a0, 134 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vrsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vrsub.vx v8, v10, a0 +; CHECK-D-NEXT: li a0, 8 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i8( %va, i1 false) ret %a @@ -264,13 +264,13 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-F-NEXT: vzext.vf2 v12, v8 ; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v12 -; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v16, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-F-NEXT: vnsrl.wi v12, v8, 0 ; CHECK-F-NEXT: li a0, 134 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vrsub.vx v8, v10, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vrsub.vx v8, v12, a0 +; CHECK-F-NEXT: li a0, 8 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv16i8: @@ -278,13 +278,13 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-D-NEXT: vzext.vf2 v12, v8 ; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v12 -; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v16, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-D-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-D-NEXT: vnsrl.wi v12, v8, 0 ; CHECK-D-NEXT: li a0, 134 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vrsub.vx v8, v10, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vrsub.vx v8, v12, a0 +; CHECK-D-NEXT: li a0, 8 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i8( %va, i1 false) ret %a @@ -420,24 +420,22 @@ ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 ; CHECK-F-NEXT: li a0, 142 -; CHECK-F-NEXT: vrsub.vx v9, v9, a0 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: li a0, 16 -; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv1i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 ; CHECK-D-NEXT: li a0, 142 -; CHECK-D-NEXT: vrsub.vx v9, v9, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: li a0, 16 -; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i16( %va, i1 false) ret %a @@ -515,24 +513,22 @@ ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 ; CHECK-F-NEXT: li a0, 142 -; CHECK-F-NEXT: vrsub.vx v9, v9, a0 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: li a0, 16 -; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv2i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 ; CHECK-D-NEXT: li a0, 142 -; CHECK-D-NEXT: vrsub.vx v9, v9, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: li a0, 16 -; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i16( %va, i1 false) ret %a @@ -610,24 +606,22 @@ ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8 -; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v10, 23 ; CHECK-F-NEXT: li a0, 142 -; CHECK-F-NEXT: vrsub.vx v9, v9, a0 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: li a0, 16 -; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv4i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 -; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v10, 23 ; CHECK-D-NEXT: li a0, 142 -; CHECK-D-NEXT: vrsub.vx v9, v9, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: li a0, 16 -; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i16( %va, i1 false) ret %a @@ -705,24 +699,22 @@ ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8 -; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v12, 23 ; CHECK-F-NEXT: li a0, 142 -; CHECK-F-NEXT: vrsub.vx v10, v10, a0 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: li a0, 16 -; CHECK-F-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv8i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 -; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v12, 23 ; CHECK-D-NEXT: li a0, 142 -; CHECK-D-NEXT: vrsub.vx v10, v10, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: li a0, 16 -; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i16( %va, i1 false) ret %a @@ -800,24 +792,22 @@ ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8 -; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-F-NEXT: vnsrl.wi v8, v16, 23 ; CHECK-F-NEXT: li a0, 142 -; CHECK-F-NEXT: vrsub.vx v12, v12, a0 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: li a0, 16 -; CHECK-F-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv16i16: ; CHECK-D: # %bb.0: ; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 -; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-D-NEXT: vnsrl.wi v8, v16, 23 ; CHECK-D-NEXT: li a0, 142 -; CHECK-D-NEXT: vrsub.vx v12, v12, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: li a0, 16 -; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i16( %va, i1 false) ret %a @@ -973,13 +963,12 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfcvt.f.xu.v v9, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 ; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v9, v9, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -989,14 +978,13 @@ ; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 ; CHECK-D-NEXT: li a0, 52 ; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-D-NEXT: vsrl.vx v9, v9, a0 +; CHECK-D-NEXT: vsrl.vx v8, v9, a0 ; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-D-NEXT: li a0, 1054 -; CHECK-D-NEXT: vrsub.vx v9, v9, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: li a0, 32 -; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i32( %va, i1 false) ret %a @@ -1081,13 +1069,12 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfcvt.f.xu.v v9, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 ; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v9, v9, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1097,14 +1084,13 @@ ; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 ; CHECK-D-NEXT: li a0, 52 ; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-D-NEXT: vsrl.vx v10, v10, a0 +; CHECK-D-NEXT: vsrl.vx v8, v10, a0 ; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-D-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-D-NEXT: vnsrl.wi v10, v8, 0 ; CHECK-D-NEXT: li a0, 1054 -; CHECK-D-NEXT: vrsub.vx v9, v9, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v10, a0 ; CHECK-D-NEXT: li a0, 32 -; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i32( %va, i1 false) ret %a @@ -1189,13 +1175,12 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfcvt.f.xu.v v10, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 ; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v10, v10, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1205,14 +1190,13 @@ ; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 ; CHECK-D-NEXT: li a0, 52 ; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-D-NEXT: vsrl.vx v12, v12, a0 +; CHECK-D-NEXT: vsrl.vx v8, v12, a0 ; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-D-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-D-NEXT: vnsrl.wi v12, v8, 0 ; CHECK-D-NEXT: li a0, 1054 -; CHECK-D-NEXT: vrsub.vx v10, v10, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v12, a0 ; CHECK-D-NEXT: li a0, 32 -; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i32( %va, i1 false) ret %a @@ -1297,13 +1281,12 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfcvt.f.xu.v v12, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 ; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v12, v12, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1313,14 +1296,13 @@ ; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 ; CHECK-D-NEXT: li a0, 52 ; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-D-NEXT: vsrl.vx v16, v16, a0 +; CHECK-D-NEXT: vsrl.vx v8, v16, a0 ; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-D-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-D-NEXT: vnsrl.wi v16, v8, 0 ; CHECK-D-NEXT: li a0, 1054 -; CHECK-D-NEXT: vrsub.vx v12, v12, a0 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v16, a0 ; CHECK-D-NEXT: li a0, 32 -; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i32( %va, i1 false) ret %a @@ -1405,13 +1387,12 @@ ; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfcvt.f.xu.v v16, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 ; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v16, v16, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1420,13 +1401,12 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-D-NEXT: vmset.m v0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: vfcvt.f.xu.v v16, v8, v0.t -; CHECK-D-NEXT: vsrl.vi v16, v16, 23 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 ; CHECK-D-NEXT: li a1, 158 -; CHECK-D-NEXT: vrsub.vx v16, v16, a1 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 32 -; CHECK-D-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i32( %va, i1 false) @@ -1537,14 +1517,13 @@ ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: vsrl.vi v8, v9, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vzext.vf2 v10, v9 +; CHECK-F-NEXT: vzext.vf2 v9, v8 ; CHECK-F-NEXT: li a1, 190 -; CHECK-F-NEXT: vrsub.vx v9, v10, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v9, a1 ; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1553,14 +1532,13 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-D-NEXT: vmset.m v0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v9, v9, a1 +; CHECK-D-NEXT: vsrl.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v9, v9, a1 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v9, a1, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i64( %va, i1 false) @@ -1671,14 +1649,13 @@ ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vzext.vf2 v12, v10 +; CHECK-F-NEXT: vzext.vf2 v10, v8 ; CHECK-F-NEXT: li a1, 190 -; CHECK-F-NEXT: vrsub.vx v10, v12, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v10, a1 ; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1687,14 +1664,13 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-D-NEXT: vmset.m v0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v10, v10, a1 +; CHECK-D-NEXT: vsrl.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v10, v10, a1 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v10, a1, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i64( %va, i1 false) @@ -1805,14 +1781,13 @@ ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: vsrl.vi v8, v12, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vzext.vf2 v16, v12 +; CHECK-F-NEXT: vzext.vf2 v12, v8 ; CHECK-F-NEXT: li a1, 190 -; CHECK-F-NEXT: vrsub.vx v12, v16, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v12, a1 ; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1821,14 +1796,13 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-D-NEXT: vmset.m v0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: vfcvt.f.xu.v v12, v8, v0.t +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v12, v12, a1 +; CHECK-D-NEXT: vsrl.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v12, v12, a1 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v12, a1, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i64( %va, i1 false) @@ -1939,14 +1913,13 @@ ; CHECK-F-NEXT: vmset.m v0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8, v0.t -; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: vsrl.vi v8, v16, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vzext.vf2 v24, v16 +; CHECK-F-NEXT: vzext.vf2 v16, v8 ; CHECK-F-NEXT: li a1, 190 -; CHECK-F-NEXT: vrsub.vx v16, v24, a1 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vrsub.vx v8, v16, a1 ; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1955,14 +1928,13 @@ ; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-D-NEXT: vmset.m v0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: vfcvt.f.xu.v v16, v8, v0.t +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v16, v16, a1 +; CHECK-D-NEXT: vsrl.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v16, v16, a1 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v16, a1, v0 +; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 ; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i64( %va, i1 false) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -9,8 +9,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 define void @ctlz_v16i8(ptr %x, ptr %y) nounwind { ; CHECK-LABEL: ctlz_v16i8: @@ -45,13 +45,13 @@ ; LMULMAX8-NEXT: vle8.v v8, (a0) ; LMULMAX8-NEXT: vzext.vf2 v10, v8 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 +; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 ; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v9, v10, 0 +; LMULMAX8-NEXT: vnsrl.wi v10, v8, 0 ; LMULMAX8-NEXT: li a1, 134 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: vrsub.vx v8, v9, a1 -; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 +; LMULMAX8-NEXT: vrsub.vx v8, v10, a1 +; LMULMAX8-NEXT: li a1, 8 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret %a = load <16 x i8>, ptr %x @@ -208,12 +208,11 @@ ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV32F-NEXT: vnsrl.wi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 142 -; LMULMAX2-RV32F-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32F-NEXT: li a1, 16 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -222,12 +221,11 @@ ; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64F-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV64F-NEXT: vnsrl.wi v8, v10, 23 ; LMULMAX2-RV64F-NEXT: li a1, 142 -; LMULMAX2-RV64F-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64F-NEXT: li a1, 16 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -236,12 +234,11 @@ ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX2-RV32D-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32D-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV32D-NEXT: vnsrl.wi v8, v10, 23 ; LMULMAX2-RV32D-NEXT: li a1, 142 -; LMULMAX2-RV32D-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 16 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -250,12 +247,11 @@ ; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX2-RV64D-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64D-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX2-RV64D-NEXT: vnsrl.wi v8, v10, 23 ; LMULMAX2-RV64D-NEXT: li a1, 142 -; LMULMAX2-RV64D-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: li a1, 16 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; @@ -264,12 +260,11 @@ ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX8-NEXT: vle16.v v8, (a0) ; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX8-NEXT: vnsrl.wi v8, v10, 23 ; LMULMAX8-NEXT: li a1, 142 -; LMULMAX8-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: li a1, 16 -; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret %a = load <8 x i16>, ptr %x @@ -363,14 +358,13 @@ ; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV32F-NEXT: vmset.m v0 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 ; LMULMAX2-RV32F-NEXT: li a1, 158 -; LMULMAX2-RV32F-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32F-NEXT: li a1, 32 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32F-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -380,14 +374,13 @@ ; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV64F-NEXT: vmset.m v0 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 ; LMULMAX2-RV64F-NEXT: li a1, 158 -; LMULMAX2-RV64F-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64F-NEXT: li a1, 32 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -397,12 +390,11 @@ ; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vnsrl.wx v9, v10, a1 +; LMULMAX2-RV32D-NEXT: vnsrl.wx v8, v10, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1054 -; LMULMAX2-RV32D-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 32 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -412,12 +404,11 @@ ; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 ; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vnsrl.wx v9, v10, a1 +; LMULMAX2-RV64D-NEXT: vnsrl.wx v8, v10, a1 ; LMULMAX2-RV64D-NEXT: li a1, 1054 -; LMULMAX2-RV64D-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: li a1, 32 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; @@ -427,12 +418,11 @@ ; LMULMAX8-NEXT: vle32.v v8, (a0) ; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 ; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v9, v10, a1 +; LMULMAX8-NEXT: vnsrl.wx v8, v10, a1 ; LMULMAX8-NEXT: li a1, 1054 -; LMULMAX8-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret %a = load <4 x i32>, ptr %x @@ -551,18 +541,15 @@ ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8, v0.t ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v9 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.i v9, 0 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vminu.vx v8, v9, a1 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -574,14 +561,13 @@ ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8, v0.t ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v11, v10, v9 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v11, a1, v0 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v10, a1 ; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -591,18 +577,14 @@ ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: vmset.m v0 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v9, v9, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1086 -; LMULMAX2-RV32D-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -612,56 +594,33 @@ ; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: vmset.m v0 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v9, v8, v0.t +; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV64D-NEXT: fsrm a1 ; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v9, v9, a1 +; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: li a1, 1086 -; LMULMAX2-RV64D-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: li a1, 64 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: ctlz_v2i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vmset.m v0 -; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v9, v8, v0.t -; LMULMAX8-RV32-NEXT: fsrm a1 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX8-RV32-NEXT: li a1, 1086 -; LMULMAX8-RV32-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 -; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v2i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vmset.m v0 -; LMULMAX8-RV64-NEXT: fsrmi a1, 1 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v9, v8, v0.t -; LMULMAX8-RV64-NEXT: fsrm a1 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: li a1, 1086 -; LMULMAX8-RV64-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 64 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v2i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX8-NEXT: vle64.v v8, (a0) +; LMULMAX8-NEXT: vmset.m v0 +; LMULMAX8-NEXT: fsrmi a1, 1 +; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; LMULMAX8-NEXT: fsrm a1 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-NEXT: li a1, 1086 +; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 +; LMULMAX8-NEXT: li a1, 64 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 +; LMULMAX8-NEXT: vse64.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) @@ -751,13 +710,13 @@ ; LMULMAX8-NEXT: vle8.v v8, (a0) ; LMULMAX8-NEXT: vzext.vf2 v12, v8 ; LMULMAX8-NEXT: vfwcvt.f.xu.v v16, v12 -; LMULMAX8-NEXT: vnsrl.wi v12, v16, 23 +; LMULMAX8-NEXT: vnsrl.wi v8, v16, 23 ; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v10, v12, 0 +; LMULMAX8-NEXT: vnsrl.wi v12, v8, 0 ; LMULMAX8-NEXT: li a1, 134 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: vrsub.vx v8, v10, a1 -; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 +; LMULMAX8-NEXT: vrsub.vx v8, v12, a1 +; LMULMAX8-NEXT: li a1, 8 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret %a = load <32 x i8>, ptr %x @@ -962,12 +921,11 @@ ; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX8-NEXT: vle16.v v8, (a0) ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 +; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 ; LMULMAX8-NEXT: li a1, 142 -; LMULMAX8-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: li a1, 16 -; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret %a = load <16 x i16>, ptr %x @@ -1061,14 +1019,13 @@ ; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV32F-NEXT: vmset.m v0 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 ; LMULMAX2-RV32F-NEXT: li a1, 158 -; LMULMAX2-RV32F-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32F-NEXT: li a1, 32 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -1078,14 +1035,13 @@ ; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV64F-NEXT: vmset.m v0 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 ; LMULMAX2-RV64F-NEXT: li a1, 158 -; LMULMAX2-RV64F-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64F-NEXT: li a1, 32 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -1095,14 +1051,13 @@ ; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV32D-NEXT: vmset.m v0 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32D-NEXT: vsrl.vi v8, v8, 23 ; LMULMAX2-RV32D-NEXT: li a1, 158 -; LMULMAX2-RV32D-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 32 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -1112,14 +1067,13 @@ ; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV64D-NEXT: vmset.m v0 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64D-NEXT: vsrl.vi v8, v8, 23 ; LMULMAX2-RV64D-NEXT: li a1, 158 -; LMULMAX2-RV64D-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: li a1, 32 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; @@ -1129,12 +1083,11 @@ ; LMULMAX8-NEXT: vle32.v v8, (a0) ; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 ; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v10, v12, a1 +; LMULMAX8-NEXT: vnsrl.wx v8, v12, a1 ; LMULMAX8-NEXT: li a1, 1054 -; LMULMAX8-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret %a = load <8 x i32>, ptr %x @@ -1253,18 +1206,15 @@ ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8, v0.t ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vwsubu.wv v12, v12, v10 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vminu.vx v8, v10, a1 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -1276,14 +1226,13 @@ ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8, v0.t ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v11, a1 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v11, v10 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v10, a1 ; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -1293,18 +1242,14 @@ ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: vmset.m v0 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1086 -; LMULMAX2-RV32D-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v12 +; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -1314,56 +1259,33 @@ ; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: vmset.m v0 ; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v8, v0.t +; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; LMULMAX2-RV64D-NEXT: fsrm a1 ; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v10, v10, a1 +; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: li a1, 1086 -; LMULMAX2-RV64D-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: li a1, 64 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: ctlz_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vmset.m v0 -; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v10, v8, v0.t -; LMULMAX8-RV32-NEXT: fsrm a1 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: li a1, 1086 -; LMULMAX8-RV32-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v12, 0 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v12 -; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vmset.m v0 -; LMULMAX8-RV64-NEXT: fsrmi a1, 1 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v10, v8, v0.t -; LMULMAX8-RV64-NEXT: fsrm a1 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vsrl.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: li a1, 1086 -; LMULMAX8-RV64-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 64 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v4i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX8-NEXT: vle64.v v8, (a0) +; LMULMAX8-NEXT: vmset.m v0 +; LMULMAX8-NEXT: fsrmi a1, 1 +; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; LMULMAX8-NEXT: fsrm a1 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-NEXT: li a1, 1086 +; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 +; LMULMAX8-NEXT: li a1, 64 +; LMULMAX8-NEXT: vminu.vx v8, v8, a1 +; LMULMAX8-NEXT: vse64.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)