diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3887,13 +3887,17 @@ if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) return Fold; + bool N0ConstOrSplat = + isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + bool N1ConstOrSplat = + isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + // Ensure that the constant occurs on the RHS and fold constant comparisons. // TODO: Handle non-splat vector constants. All undef causes trouble. // FIXME: We can't yet fold constant scalable vector splats, so avoid an // infinite loop here when we encounter one. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isConstOrConstSplat(N0) && - (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) && + if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -3902,7 +3906,7 @@ // -- but in reverse order -- then try to commute the operands of this setcc // to match. A matching pair of setcc (cmp) and sub may be combined into 1 // instruction on some targets. - if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && + if (!N0ConstOrSplat && !N1ConstOrSplat && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -50,38 +50,36 @@ ; RV32D-LABEL: cttz_nxv1i8: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 +; RV32D-NEXT: vand.vv v9, v8, v9 ; RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 +; RV32D-NEXT: vzext.vf4 v10, v9 +; RV32D-NEXT: vfcvt.f.xu.v v9, v10 ; RV32D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 +; RV32D-NEXT: vnsrl.wi v9, v9, 23 ; RV32D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 +; RV32D-NEXT: vncvt.x.x.w v9, v9 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 +; RV32D-NEXT: vsub.vx v8, v9, a0 ; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv1i8: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 +; RV64D-NEXT: vand.vv v9, v8, v9 ; RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 +; RV64D-NEXT: vzext.vf4 v10, v9 +; RV64D-NEXT: vfcvt.f.xu.v v9, v10 ; RV64D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 +; RV64D-NEXT: vnsrl.wi v9, v9, 23 ; RV64D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 +; RV64D-NEXT: vncvt.x.x.w v9, v9 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 +; RV64D-NEXT: vsub.vx v8, v9, a0 ; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv1i8( %va, i1 false) @@ -135,38 +133,36 @@ ; RV32D-LABEL: cttz_nxv2i8: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 +; RV32D-NEXT: vand.vv v9, v8, v9 ; RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 +; RV32D-NEXT: vzext.vf4 v10, v9 +; RV32D-NEXT: vfcvt.f.xu.v v9, v10 ; RV32D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 +; RV32D-NEXT: vnsrl.wi v9, v9, 23 ; RV32D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 +; RV32D-NEXT: vncvt.x.x.w v9, v9 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 +; RV32D-NEXT: vsub.vx v8, v9, a0 ; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv2i8: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 +; RV64D-NEXT: vand.vv v9, v8, v9 ; RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 +; RV64D-NEXT: vzext.vf4 v10, v9 +; RV64D-NEXT: vfcvt.f.xu.v v9, v10 ; RV64D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 +; RV64D-NEXT: vnsrl.wi v9, v9, 23 ; RV64D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 +; RV64D-NEXT: vncvt.x.x.w v9, v9 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 +; RV64D-NEXT: vsub.vx v8, v9, a0 ; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv2i8( %va, i1 false) @@ -220,38 +216,36 @@ ; RV32D-LABEL: cttz_nxv4i8: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 +; RV32D-NEXT: vand.vv v9, v8, v9 ; RV32D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32D-NEXT: vzext.vf4 v10, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v10 +; RV32D-NEXT: vzext.vf4 v10, v9 +; RV32D-NEXT: vfcvt.f.xu.v v10, v10 ; RV32D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32D-NEXT: vnsrl.wi v10, v8, 23 +; RV32D-NEXT: vnsrl.wi v9, v10, 23 ; RV32D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v10 +; RV32D-NEXT: vncvt.x.x.w v9, v9 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 +; RV32D-NEXT: vsub.vx v8, v9, a0 ; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv4i8: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 +; RV64D-NEXT: vand.vv v9, v8, v9 ; RV64D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64D-NEXT: vzext.vf4 v10, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v10 +; RV64D-NEXT: vzext.vf4 v10, v9 +; RV64D-NEXT: vfcvt.f.xu.v v10, v10 ; RV64D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64D-NEXT: vnsrl.wi v10, v8, 23 +; RV64D-NEXT: vnsrl.wi v9, v10, 23 ; RV64D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v10 +; RV64D-NEXT: vncvt.x.x.w v9, v9 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 +; RV64D-NEXT: vsub.vx v8, v9, a0 ; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv4i8( %va, i1 false) @@ -305,38 +299,36 @@ ; RV32D-LABEL: cttz_nxv8i8: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 +; RV32D-NEXT: vand.vv v9, v8, v9 ; RV32D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32D-NEXT: vzext.vf4 v12, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v12 +; RV32D-NEXT: vzext.vf4 v12, v9 +; RV32D-NEXT: vfcvt.f.xu.v v12, v12 ; RV32D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32D-NEXT: vnsrl.wi v12, v8, 23 +; RV32D-NEXT: vnsrl.wi v10, v12, 23 ; RV32D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v12 +; RV32D-NEXT: vncvt.x.x.w v9, v10 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 +; RV32D-NEXT: vsub.vx v8, v9, a0 ; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv8i8: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 +; RV64D-NEXT: vand.vv v9, v8, v9 ; RV64D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64D-NEXT: vzext.vf4 v12, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v12 +; RV64D-NEXT: vzext.vf4 v12, v9 +; RV64D-NEXT: vfcvt.f.xu.v v12, v12 ; RV64D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64D-NEXT: vnsrl.wi v12, v8, 23 +; RV64D-NEXT: vnsrl.wi v10, v12, 23 ; RV64D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v12 +; RV64D-NEXT: vncvt.x.x.w v9, v10 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 +; RV64D-NEXT: vsub.vx v8, v9, a0 ; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv8i8( %va, i1 false) @@ -390,38 +382,36 @@ ; RV32D-LABEL: cttz_nxv16i8: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV32D-NEXT: vmv.v.i v10, 0 -; RV32D-NEXT: vmseq.vv v0, v10, v8 ; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v10 +; RV32D-NEXT: vand.vv v10, v8, v10 ; RV32D-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; RV32D-NEXT: vzext.vf4 v16, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v16 +; RV32D-NEXT: vzext.vf4 v16, v10 +; RV32D-NEXT: vfcvt.f.xu.v v16, v16 ; RV32D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV32D-NEXT: vnsrl.wi v16, v8, 23 +; RV32D-NEXT: vnsrl.wi v12, v16, 23 ; RV32D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v16 +; RV32D-NEXT: vncvt.x.x.w v10, v12 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 +; RV32D-NEXT: vsub.vx v8, v10, a0 ; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv16i8: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV64D-NEXT: vmv.v.i v10, 0 -; RV64D-NEXT: vmseq.vv v0, v10, v8 ; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 +; RV64D-NEXT: vand.vv v10, v8, v10 ; RV64D-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; RV64D-NEXT: vzext.vf4 v16, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v16 +; RV64D-NEXT: vzext.vf4 v16, v10 +; RV64D-NEXT: vfcvt.f.xu.v v16, v16 ; RV64D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64D-NEXT: vnsrl.wi v16, v8, 23 +; RV64D-NEXT: vnsrl.wi v12, v16, 23 ; RV64D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v16 +; RV64D-NEXT: vncvt.x.x.w v10, v12 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 +; RV64D-NEXT: vsub.vx v8, v10, a0 ; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv16i8( %va, i1 false) @@ -541,31 +531,29 @@ ; RV32D-LABEL: cttz_nxv1i16: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 +; RV32D-NEXT: vand.vv v9, v8, v9 +; RV32D-NEXT: vfwcvt.f.xu.v v10, v9 +; RV32D-NEXT: vnsrl.wi v9, v10, 23 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vsub.vx v9, v9, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 ; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv1i16: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 +; RV64D-NEXT: vand.vv v9, v8, v9 +; RV64D-NEXT: vfwcvt.f.xu.v v10, v9 +; RV64D-NEXT: vnsrl.wi v9, v10, 23 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vsub.vx v9, v9, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv1i16( %va, i1 false) ret %a @@ -632,31 +620,29 @@ ; RV32D-LABEL: cttz_nxv2i16: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 +; RV32D-NEXT: vand.vv v9, v8, v9 +; RV32D-NEXT: vfwcvt.f.xu.v v10, v9 +; RV32D-NEXT: vnsrl.wi v9, v10, 23 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vsub.vx v9, v9, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 ; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv2i16: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 +; RV64D-NEXT: vand.vv v9, v8, v9 +; RV64D-NEXT: vfwcvt.f.xu.v v10, v9 +; RV64D-NEXT: vnsrl.wi v9, v10, 23 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vsub.vx v9, v9, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv2i16( %va, i1 false) ret %a @@ -723,31 +709,29 @@ ; RV32D-LABEL: cttz_nxv4i16: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: vnsrl.wi v8, v10, 23 +; RV32D-NEXT: vand.vv v9, v8, v9 +; RV32D-NEXT: vfwcvt.f.xu.v v10, v9 +; RV32D-NEXT: vnsrl.wi v9, v10, 23 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vsub.vx v9, v9, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 ; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv4i16: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: vnsrl.wi v8, v10, 23 +; RV64D-NEXT: vand.vv v9, v8, v9 +; RV64D-NEXT: vfwcvt.f.xu.v v10, v9 +; RV64D-NEXT: vnsrl.wi v9, v10, 23 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vsub.vx v9, v9, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv4i16( %va, i1 false) ret %a @@ -814,31 +798,29 @@ ; RV32D-LABEL: cttz_nxv8i16: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV32D-NEXT: vmv.v.i v10, 0 -; RV32D-NEXT: vmseq.vv v0, v10, v8 ; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v10 -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: vnsrl.wi v8, v12, 23 +; RV32D-NEXT: vand.vv v10, v8, v10 +; RV32D-NEXT: vfwcvt.f.xu.v v12, v10 +; RV32D-NEXT: vnsrl.wi v10, v12, 23 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vsub.vx v10, v10, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 ; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV32D-NEXT: vmerge.vxm v8, v10, a0, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv8i16: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV64D-NEXT: vmv.v.i v10, 0 -; RV64D-NEXT: vmseq.vv v0, v10, v8 ; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: vnsrl.wi v8, v12, 23 +; RV64D-NEXT: vand.vv v10, v8, v10 +; RV64D-NEXT: vfwcvt.f.xu.v v12, v10 +; RV64D-NEXT: vnsrl.wi v10, v12, 23 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vsub.vx v10, v10, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v10, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv8i16( %va, i1 false) ret %a @@ -905,31 +887,29 @@ ; RV32D-LABEL: cttz_nxv16i16: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV32D-NEXT: vmv.v.i v12, 0 -; RV32D-NEXT: vmseq.vv v0, v12, v8 ; RV32D-NEXT: vrsub.vi v12, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v12 -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: vnsrl.wi v8, v16, 23 +; RV32D-NEXT: vand.vv v12, v8, v12 +; RV32D-NEXT: vfwcvt.f.xu.v v16, v12 +; RV32D-NEXT: vnsrl.wi v12, v16, 23 ; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 +; RV32D-NEXT: vsub.vx v12, v12, a0 +; RV32D-NEXT: vmseq.vi v0, v8, 0 ; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV32D-NEXT: vmerge.vxm v8, v12, a0, v0 ; RV32D-NEXT: ret ; ; RV64D-LABEL: cttz_nxv16i16: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV64D-NEXT: vmv.v.i v12, 0 -; RV64D-NEXT: vmseq.vv v0, v12, v8 ; RV64D-NEXT: vrsub.vi v12, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v12 -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: vnsrl.wi v8, v16, 23 +; RV64D-NEXT: vand.vv v12, v8, v12 +; RV64D-NEXT: vfwcvt.f.xu.v v16, v12 +; RV64D-NEXT: vnsrl.wi v12, v16, 23 ; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vsub.vx v12, v12, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v12, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv16i16( %va, i1 false) ret %a @@ -1077,20 +1057,19 @@ ; RV64D-LABEL: cttz_nxv1i32: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 +; RV64D-NEXT: vand.vv v9, v8, v9 +; RV64D-NEXT: vfwcvt.f.xu.v v10, v9 ; RV64D-NEXT: li a0, 52 ; RV64D-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64D-NEXT: vsrl.vx v8, v9, a0 +; RV64D-NEXT: vsrl.vx v9, v10, a0 ; RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 +; RV64D-NEXT: vncvt.x.x.w v9, v9 ; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v8, a0 +; RV64D-NEXT: vsub.vx v9, v9, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv1i32( %va, i1 false) ret %a @@ -1177,20 +1156,19 @@ ; RV64D-LABEL: cttz_nxv2i32: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 ; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 +; RV64D-NEXT: vand.vv v9, v8, v9 +; RV64D-NEXT: vfwcvt.f.xu.v v10, v9 ; RV64D-NEXT: li a0, 52 ; RV64D-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64D-NEXT: vsrl.vx v8, v10, a0 +; RV64D-NEXT: vsrl.vx v10, v10, a0 ; RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v10, v8 +; RV64D-NEXT: vncvt.x.x.w v9, v10 ; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v10, a0 +; RV64D-NEXT: vsub.vx v9, v9, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv2i32( %va, i1 false) ret %a @@ -1277,20 +1255,19 @@ ; RV64D-LABEL: cttz_nxv4i32: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV64D-NEXT: vmv.v.i v10, 0 -; RV64D-NEXT: vmseq.vv v0, v10, v8 ; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 +; RV64D-NEXT: vand.vv v10, v8, v10 +; RV64D-NEXT: vfwcvt.f.xu.v v12, v10 ; RV64D-NEXT: li a0, 52 ; RV64D-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64D-NEXT: vsrl.vx v8, v12, a0 +; RV64D-NEXT: vsrl.vx v12, v12, a0 ; RV64D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v12, v8 +; RV64D-NEXT: vncvt.x.x.w v10, v12 ; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v12, a0 +; RV64D-NEXT: vsub.vx v10, v10, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v10, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv4i32( %va, i1 false) ret %a @@ -1377,20 +1354,19 @@ ; RV64D-LABEL: cttz_nxv8i32: ; RV64D: # %bb.0: ; RV64D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV64D-NEXT: vmv.v.i v12, 0 -; RV64D-NEXT: vmseq.vv v0, v12, v8 ; RV64D-NEXT: vrsub.vi v12, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v12 -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 +; RV64D-NEXT: vand.vv v12, v8, v12 +; RV64D-NEXT: vfwcvt.f.xu.v v16, v12 ; RV64D-NEXT: li a0, 52 ; RV64D-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64D-NEXT: vsrl.vx v8, v16, a0 +; RV64D-NEXT: vsrl.vx v16, v16, a0 ; RV64D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v16, v8 +; RV64D-NEXT: vncvt.x.x.w v12, v16 ; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v16, a0 +; RV64D-NEXT: vsub.vx v12, v12, a0 +; RV64D-NEXT: vmseq.vi v0, v8, 0 ; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64D-NEXT: vmerge.vxm v8, v12, a0, v0 ; RV64D-NEXT: ret %a = call @llvm.cttz.nxv8i32( %va, i1 false) ret %a