diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1,419 +1,224 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I -; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 define @ctlz_nxv1i8( %va) { -; RV32I-LABEL: ctlz_nxv1i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv1i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_nxv1i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v9, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32D-NEXT: vnsrl.wi v9, v9, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32D-NEXT: vncvt.x.x.w v9, v9 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: vrsub.vx v8, v9, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv1i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v9, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64D-NEXT: vnsrl.wi v9, v9, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64D-NEXT: vncvt.x.x.w v9, v9 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: vrsub.vx v8, v9, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv1i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv1i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v9, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v9, v9 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v9, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i8( %va, i1 false) ret %a } declare @llvm.ctlz.nxv1i8(, i1) define @ctlz_nxv2i8( %va) { -; RV32I-LABEL: ctlz_nxv2i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv2i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_nxv2i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v9, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV32D-NEXT: vnsrl.wi v9, v9, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v9, v9 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: vrsub.vx v8, v9, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv2i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v9, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV64D-NEXT: vnsrl.wi v9, v9, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v9, v9 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: vrsub.vx v8, v9, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv2i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv2i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v9, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v9, v9 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v9, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i8( %va, i1 false) ret %a } declare @llvm.ctlz.nxv2i8(, i1) define @ctlz_nxv4i8( %va) { -; RV32I-LABEL: ctlz_nxv4i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv4i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_nxv4i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV32D-NEXT: vzext.vf4 v10, v8 -; RV32D-NEXT: vfcvt.f.xu.v v10, v10 -; RV32D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32D-NEXT: vnsrl.wi v9, v10, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v9, v9 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: vrsub.vx v8, v9, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv4i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV64D-NEXT: vzext.vf4 v10, v8 -; RV64D-NEXT: vfcvt.f.xu.v v10, v10 -; RV64D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64D-NEXT: vnsrl.wi v9, v10, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v9, v9 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: vrsub.vx v8, v9, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv4i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv4i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v10, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v10, v10 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v9, v9 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v9, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i8( %va, i1 false) ret %a } declare @llvm.ctlz.nxv4i8(, i1) define @ctlz_nxv8i8( %va) { -; RV32I-LABEL: ctlz_nxv8i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv8i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_nxv8i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV32D-NEXT: vzext.vf4 v12, v8 -; RV32D-NEXT: vfcvt.f.xu.v v12, v12 -; RV32D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32D-NEXT: vnsrl.wi v10, v12, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v9, v10 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: vrsub.vx v8, v9, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv8i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV64D-NEXT: vzext.vf4 v12, v8 -; RV64D-NEXT: vfcvt.f.xu.v v12, v12 -; RV64D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64D-NEXT: vnsrl.wi v10, v12, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v9, v10 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: vrsub.vx v8, v9, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv8i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv8i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vzext.vf4 v12, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v12, v12 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v9, v10 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v9, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i8( %va, i1 false) ret %a } declare @llvm.ctlz.nxv8i8(, i1) define @ctlz_nxv16i8( %va) { -; RV32I-LABEL: ctlz_nxv16i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv16i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_nxv16i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; RV32D-NEXT: vzext.vf4 v16, v8 -; RV32D-NEXT: vfcvt.f.xu.v v16, v16 -; RV32D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV32D-NEXT: vnsrl.wi v12, v16, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v10, v12 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: vrsub.vx v8, v10, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv16i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; RV64D-NEXT: vzext.vf4 v16, v8 -; RV64D-NEXT: vfcvt.f.xu.v v16, v16 -; RV64D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64D-NEXT: vnsrl.wi v12, v16, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v10, v12 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: vrsub.vx v8, v10, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv16i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_nxv16i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-D-NEXT: vzext.vf4 v16, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v10, v12 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vrsub.vx v8, v10, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i8( %va, i1 false) ret %a } @@ -544,29 +349,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv1i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v9, v9, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v9, v9, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv1i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v9, v9, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v9, v9, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv1i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v9, v9, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i16( %va, i1 false) ret %a } @@ -639,29 +432,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv2i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v9, v9, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v9, v9, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv2i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v9, v9, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v9, v9, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv2i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v9, v9, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i16( %va, i1 false) ret %a } @@ -734,29 +515,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv4i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: vnsrl.wi v9, v10, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v9, v9, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv4i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: vnsrl.wi v9, v10, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v9, v9, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv4i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v9, v9, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i16( %va, i1 false) ret %a } @@ -829,29 +598,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv8i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: vnsrl.wi v10, v12, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v10, v10, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v10, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv8i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: vnsrl.wi v10, v12, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v10, v10, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v10, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv8i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v10, v10, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i16( %va, i1 false) ret %a } @@ -924,29 +681,17 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv16i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: vnsrl.wi v12, v16, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v12, v12, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v12, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv16i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: vnsrl.wi v12, v16, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v12, v12, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v12, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv16i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v12, v12, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i16( %va, i1 false) ret %a } @@ -1096,37 +841,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv1i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32D-NEXT: vsrl.vx v9, v9, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v9, v9 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v9, v9, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 32 -; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv1i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64D-NEXT: vsrl.vx v9, v9, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v9, v9 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v9, v9, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv1i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-D-NEXT: vsrl.vx v9, v9, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v9, v9 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v9, v9, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i32( %va, i1 false) ret %a } @@ -1205,37 +934,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv2i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32D-NEXT: vsrl.vx v10, v10, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v9, v10 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v9, v9, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 32 -; RV32D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv2i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64D-NEXT: vsrl.vx v10, v10, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v9, v10 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v9, v9, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v9, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv2i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-D-NEXT: vsrl.vx v10, v10, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v9, v10 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v9, v9, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i32( %va, i1 false) ret %a } @@ -1314,37 +1027,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv4i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32D-NEXT: vsrl.vx v12, v12, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v10, v12 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v10, v10, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 32 -; RV32D-NEXT: vmerge.vxm v8, v10, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv4i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64D-NEXT: vsrl.vx v12, v12, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v10, v12 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v10, v10, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v10, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv4i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-D-NEXT: vsrl.vx v12, v12, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v10, v12 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v10, v10, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i32( %va, i1 false) ret %a } @@ -1423,37 +1120,21 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_nxv8i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32D-NEXT: vsrl.vx v16, v16, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v12, v16 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v12, v12, a0 -; RV32D-NEXT: vmseq.vi v0, v8, 0 -; RV32D-NEXT: li a0, 32 -; RV32D-NEXT: vmerge.vxm v8, v12, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_nxv8i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64D-NEXT: vsrl.vx v16, v16, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v12, v16 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v12, v12, a0 -; RV64D-NEXT: vmseq.vi v0, v8, 0 -; RV64D-NEXT: li a0, 32 -; RV64D-NEXT: vmerge.vxm v8, v12, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_nxv8i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-D-NEXT: vsrl.vx v16, v16, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v12, v16 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v12, v12, a0 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: li a0, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i32( %va, i1 false) ret %a } @@ -1953,391 +1634,206 @@ declare @llvm.ctlz.nxv8i64(, i1) define @ctlz_zero_undef_nxv1i8( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv1i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv1i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_zero_undef_nxv1i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv1i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv1i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i8( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv2i8( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv2i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv2i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_zero_undef_nxv2i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv2i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv2i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i8( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv4i8( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv4i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv4i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_zero_undef_nxv4i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV32D-NEXT: vzext.vf4 v10, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v10 -; RV32D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32D-NEXT: vnsrl.wi v10, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v10 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv4i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV64D-NEXT: vzext.vf4 v10, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v10 -; RV64D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64D-NEXT: vnsrl.wi v10, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v10 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v10, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v10 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vnsrl.wi v10, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v10 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i8( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv8i8( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv8i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv8i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_zero_undef_nxv8i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV32D-NEXT: vzext.vf4 v12, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v12 -; RV32D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32D-NEXT: vnsrl.wi v12, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v12 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv8i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV64D-NEXT: vzext.vf4 v12, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v12 -; RV64D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64D-NEXT: vnsrl.wi v12, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v12 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vzext.vf4 v12, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v12 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v12, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v12 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i8( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv16i8( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv16i8: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv16i8: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: ctlz_zero_undef_nxv16i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; RV32D-NEXT: vzext.vf4 v16, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v16 -; RV32D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV32D-NEXT: vnsrl.wi v16, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v16 -; RV32D-NEXT: li a0, 134 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv16i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; RV64D-NEXT: vzext.vf4 v16, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v16 -; RV64D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64D-NEXT: vnsrl.wi v16, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v16 -; RV64D-NEXT: li a0, 134 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-D-NEXT: vzext.vf4 v16, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v16 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v16, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v16 +; CHECK-D-NEXT: li a0, 134 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i8( %va, i1 true) ret %a } @@ -2465,23 +1961,14 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv1i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv1i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv1i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i16( %va, i1 true) ret %a } @@ -2553,23 +2040,14 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv2i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv2i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv2i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i16( %va, i1 true) ret %a } @@ -2641,23 +2119,14 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv4i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: vnsrl.wi v8, v10, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv4i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: vnsrl.wi v8, v10, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv4i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i16( %va, i1 true) ret %a } @@ -2729,23 +2198,14 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv8i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: vnsrl.wi v8, v12, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv8i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: vnsrl.wi v8, v12, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv8i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i16( %va, i1 true) ret %a } @@ -2817,23 +2277,14 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv16i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: vnsrl.wi v8, v16, 23 -; RV32D-NEXT: li a0, 142 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv16i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: vnsrl.wi v8, v16, 23 -; RV64D-NEXT: li a0, 142 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv16i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-D-NEXT: li a0, 142 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv16i16( %va, i1 true) ret %a } @@ -2981,31 +2432,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv1i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32D-NEXT: vsrl.vx v8, v9, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv1i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64D-NEXT: vsrl.vx v8, v9, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv1i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v9, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv1i32( %va, i1 true) ret %a } @@ -3083,31 +2521,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv2i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32D-NEXT: vsrl.vx v8, v10, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v10, v8 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v8, v10, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv2i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64D-NEXT: vsrl.vx v8, v10, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v10, v8 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v8, v10, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv2i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v10, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v10, v8 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v8, v10, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv2i32( %va, i1 true) ret %a } @@ -3185,31 +2610,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv4i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32D-NEXT: vsrl.vx v8, v12, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v12, v8 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v8, v12, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv4i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64D-NEXT: vsrl.vx v8, v12, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v12, v8 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v8, v12, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv4i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v12, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v12, v8 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v8, v12, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv4i32( %va, i1 true) ret %a } @@ -3287,31 +2699,18 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: ctlz_zero_undef_nxv8i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32D-NEXT: vsrl.vx v8, v16, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v16, v8 -; RV32D-NEXT: li a0, 1054 -; RV32D-NEXT: vrsub.vx v8, v16, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: ctlz_zero_undef_nxv8i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64D-NEXT: vsrl.vx v8, v16, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v16, v8 -; RV64D-NEXT: li a0, 1054 -; RV64D-NEXT: vrsub.vx v8, v16, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: ctlz_zero_undef_nxv8i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v16, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v16, v8 +; CHECK-D-NEXT: li a0, 1054 +; CHECK-D-NEXT: vrsub.vx v8, v16, a0 +; CHECK-D-NEXT: ret %a = call @llvm.ctlz.nxv8i32( %va, i1 true) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1,429 +1,229 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I -; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32,RV32D +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64,RV64D define @cttz_nxv1i8( %va) { -; RV32I-LABEL: cttz_nxv1i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, mf8, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv1i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, mf8, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_nxv1i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv1i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv1i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_nxv1i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-D-NEXT: vmv.v.i v9, 0 +; CHECK-D-NEXT: vmseq.vv v0, v9, v8 +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv1i8( %va, i1 false) ret %a } declare @llvm.cttz.nxv1i8(, i1) define @cttz_nxv2i8( %va) { -; RV32I-LABEL: cttz_nxv2i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, mf4, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv2i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, mf4, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_nxv2i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv2i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv2i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_nxv2i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-D-NEXT: vmv.v.i v9, 0 +; CHECK-D-NEXT: vmseq.vv v0, v9, v8 +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv2i8( %va, i1 false) ret %a } declare @llvm.cttz.nxv2i8(, i1) define @cttz_nxv4i8( %va) { -; RV32I-LABEL: cttz_nxv4i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv4i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_nxv4i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32D-NEXT: vzext.vf4 v10, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v10 -; RV32D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32D-NEXT: vnsrl.wi v10, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v10 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv4i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64D-NEXT: vzext.vf4 v10, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v10 -; RV64D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64D-NEXT: vnsrl.wi v10, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v10 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv4i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_nxv4i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-D-NEXT: vmv.v.i v9, 0 +; CHECK-D-NEXT: vmseq.vv v0, v9, v8 +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v10, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v10 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vnsrl.wi v10, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v10 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv4i8( %va, i1 false) ret %a } declare @llvm.cttz.nxv4i8(, i1) define @cttz_nxv8i8( %va) { -; RV32I-LABEL: cttz_nxv8i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, m1, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv8i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, m1, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_nxv8i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32D-NEXT: vzext.vf4 v12, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v12 -; RV32D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32D-NEXT: vnsrl.wi v12, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v12 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv8i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64D-NEXT: vzext.vf4 v12, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v12 -; RV64D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64D-NEXT: vnsrl.wi v12, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v12 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv8i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_nxv8i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-D-NEXT: vmv.v.i v9, 0 +; CHECK-D-NEXT: vmseq.vv v0, v9, v8 +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vzext.vf4 v12, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v12 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v12, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v12 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv8i8( %va, i1 false) ret %a } declare @llvm.cttz.nxv8i8(, i1) define @cttz_nxv16i8( %va) { -; RV32I-LABEL: cttz_nxv16i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, m2, ta, mu -; RV32I-NEXT: vsub.vx v10, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv16i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, m2, ta, mu -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_nxv16i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV32D-NEXT: vmv.v.i v10, 0 -; RV32D-NEXT: vmseq.vv v0, v10, v8 -; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v10 -; RV32D-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; RV32D-NEXT: vzext.vf4 v16, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v16 -; RV32D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV32D-NEXT: vnsrl.wi v16, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v16 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv16i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV64D-NEXT: vmv.v.i v10, 0 -; RV64D-NEXT: vmseq.vv v0, v10, v8 -; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 -; RV64D-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; RV64D-NEXT: vzext.vf4 v16, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v16 -; RV64D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64D-NEXT: vnsrl.wi v16, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v16 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: vmerge.vim v8, v8, 8, v0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv16i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_nxv16i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-D-NEXT: vmv.v.i v10, 0 +; CHECK-D-NEXT: vmseq.vv v0, v10, v8 +; CHECK-D-NEXT: vrsub.vi v10, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v10 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-D-NEXT: vzext.vf4 v16, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v16 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v16, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v16 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i8( %va, i1 false) ret %a } @@ -538,35 +338,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_nxv1i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv1i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_nxv1i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vmv.v.i v9, 0 +; CHECK-D-NEXT: vmseq.vv v0, v9, v8 +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv1i16( %va, i1 false) ret %a } @@ -629,35 +414,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_nxv2i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv2i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_nxv2i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vmv.v.i v9, 0 +; CHECK-D-NEXT: vmseq.vv v0, v9, v8 +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv2i16( %va, i1 false) ret %a } @@ -720,35 +490,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_nxv4i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV32D-NEXT: vmv.v.i v9, 0 -; RV32D-NEXT: vmseq.vv v0, v9, v8 -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: vnsrl.wi v8, v10, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv4i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV64D-NEXT: vmv.v.i v9, 0 -; RV64D-NEXT: vmseq.vv v0, v9, v8 -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: vnsrl.wi v8, v10, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_nxv4i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vmv.v.i v9, 0 +; CHECK-D-NEXT: vmseq.vv v0, v9, v8 +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv4i16( %va, i1 false) ret %a } @@ -811,35 +566,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_nxv8i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV32D-NEXT: vmv.v.i v10, 0 -; RV32D-NEXT: vmseq.vv v0, v10, v8 -; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v10 -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: vnsrl.wi v8, v12, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv8i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV64D-NEXT: vmv.v.i v10, 0 -; RV64D-NEXT: vmseq.vv v0, v10, v8 -; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: vnsrl.wi v8, v12, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_nxv8i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vmv.v.i v10, 0 +; CHECK-D-NEXT: vmseq.vv v0, v10, v8 +; CHECK-D-NEXT: vrsub.vi v10, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v10 +; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv8i16( %va, i1 false) ret %a } @@ -902,35 +642,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_nxv16i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV32D-NEXT: vmv.v.i v12, 0 -; RV32D-NEXT: vmseq.vv v0, v12, v8 -; RV32D-NEXT: vrsub.vi v12, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v12 -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: vnsrl.wi v8, v16, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: li a0, 16 -; RV32D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_nxv16i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV64D-NEXT: vmv.v.i v12, 0 -; RV64D-NEXT: vmseq.vv v0, v12, v8 -; RV64D-NEXT: vrsub.vi v12, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v12 -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: vnsrl.wi v8, v16, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: li a0, 16 -; RV64D-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_nxv16i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vmv.v.i v12, 0 +; CHECK-D-NEXT: vmseq.vv v0, v12, v8 +; CHECK-D-NEXT: vrsub.vi v12, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v12 +; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 16 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i16( %va, i1 false) ret %a } @@ -1773,415 +1498,230 @@ ; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) ; RV64-NEXT: lui a1, %hi(.LCPI21_1) ; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI21_2) -; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI21_3) -; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %a = call @llvm.cttz.nxv8i64( %va, i1 false) - ret %a -} -declare @llvm.cttz.nxv8i64(, i1) - -define @cttz_zero_undef_nxv1i8( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv1i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, mf8, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv1i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, mf8, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_zero_undef_nxv1i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv1i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret - %a = call @llvm.cttz.nxv1i8( %va, i1 true) - ret %a -} - -define @cttz_zero_undef_nxv2i8( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv2i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, mf4, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv2i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, mf4, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_zero_undef_nxv2i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV32D-NEXT: vzext.vf4 v9, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV32D-NEXT: vnsrl.wi v8, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv2i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64D-NEXT: vzext.vf4 v9, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV64D-NEXT: vnsrl.wi v8, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret - %a = call @llvm.cttz.nxv2i8( %va, i1 true) - ret %a -} - -define @cttz_zero_undef_nxv4i8( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv4i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv4i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_zero_undef_nxv4i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32D-NEXT: vzext.vf4 v10, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v10 -; RV32D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32D-NEXT: vnsrl.wi v10, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v10 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv4i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64D-NEXT: vzext.vf4 v10, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v10 -; RV64D-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64D-NEXT: vnsrl.wi v10, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v10 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vsub.vv v8, v8, v16 +; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vsrl.vi v8, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_3) +; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v8, v8, a0 +; RV64-NEXT: ret + %a = call @llvm.cttz.nxv8i64( %va, i1 false) + ret %a +} +declare @llvm.cttz.nxv8i64(, i1) + +define @cttz_zero_undef_nxv1i8( %va) { +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv1i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret + %a = call @llvm.cttz.nxv1i8( %va, i1 true) + ret %a +} + +define @cttz_zero_undef_nxv2i8( %va) { +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv2i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vzext.vf4 v9, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret + %a = call @llvm.cttz.nxv2i8( %va, i1 true) + ret %a +} + +define @cttz_zero_undef_nxv4i8( %va) { +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv4i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vzext.vf4 v10, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v10 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vnsrl.wi v10, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v10 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv4i8( %va, i1 true) ret %a } define @cttz_zero_undef_nxv8i8( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv8i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, m1, ta, mu -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv8i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, m1, ta, mu -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_zero_undef_nxv8i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32D-NEXT: vzext.vf4 v12, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v12 -; RV32D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32D-NEXT: vnsrl.wi v12, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v12 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv8i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64D-NEXT: vzext.vf4 v12, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v12 -; RV64D-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64D-NEXT: vnsrl.wi v12, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v12 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv8i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vzext.vf4 v12, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v12 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vnsrl.wi v12, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v12 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv8i8( %va, i1 true) ret %a } define @cttz_zero_undef_nxv16i8( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv16i8: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e8, m2, ta, mu -; RV32I-NEXT: vsub.vx v10, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: li a0, 85 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: li a0, 51 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: vand.vi v8, v8, 15 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv16i8: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e8, m2, ta, mu -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: li a0, 85 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: li a0, 51 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: vand.vi v8, v8, 15 -; RV64I-NEXT: ret -; -; RV32D-LABEL: cttz_zero_undef_nxv16i8: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v10 -; RV32D-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; RV32D-NEXT: vzext.vf4 v16, v8 -; RV32D-NEXT: vfcvt.f.xu.v v8, v16 -; RV32D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV32D-NEXT: vnsrl.wi v16, v8, 23 -; RV32D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v16 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv16i8: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 -; RV64D-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; RV64D-NEXT: vzext.vf4 v16, v8 -; RV64D-NEXT: vfcvt.f.xu.v v8, v16 -; RV64D-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64D-NEXT: vnsrl.wi v16, v8, 23 -; RV64D-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v16 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i8: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: li a0, 85 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: li a0, 51 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 +; CHECK-ZVE64X-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv16i8: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-D-NEXT: vrsub.vi v10, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v10 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-D-NEXT: vzext.vf4 v16, v8 +; CHECK-D-NEXT: vfcvt.f.xu.v v8, v16 +; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vnsrl.wi v16, v8, 23 +; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v16 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i8( %va, i1 true) ret %a } @@ -2293,27 +1833,16 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv1i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv1i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv1i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv1i16( %va, i1 true) ret %a } @@ -2375,27 +1904,16 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv2i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: vnsrl.wi v8, v9, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv2i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: vnsrl.wi v8, v9, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv2i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv2i16( %va, i1 true) ret %a } @@ -2457,27 +1975,16 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv4i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: vnsrl.wi v8, v10, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv4i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: vnsrl.wi v8, v10, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv4i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv4i16( %va, i1 true) ret %a } @@ -2539,27 +2046,16 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv8i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v10 -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: vnsrl.wi v8, v12, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv8i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: vnsrl.wi v8, v12, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv8i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-D-NEXT: vrsub.vi v10, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v10 +; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv8i16( %va, i1 true) ret %a } @@ -2621,27 +2117,16 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 8 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv16i16: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV32D-NEXT: vrsub.vi v12, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v12 -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: vnsrl.wi v8, v16, 23 -; RV32D-NEXT: li a0, 127 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv16i16: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; RV64D-NEXT: vrsub.vi v12, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v12 -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: vnsrl.wi v8, v16, 23 -; RV64D-NEXT: li a0, 127 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv16i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-D-NEXT: vrsub.vi v12, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v12 +; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv16i16( %va, i1 true) ret %a } @@ -2765,35 +2250,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv1i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32D-NEXT: vsrl.vx v8, v9, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v8, v8 -; RV32D-NEXT: li a0, 1023 -; RV32D-NEXT: vsub.vx v8, v8, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv1i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v9, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64D-NEXT: vsrl.vx v8, v9, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v8, v8 -; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v8, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv1i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v9, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v8, v8 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv1i32( %va, i1 true) ret %a } @@ -2857,35 +2327,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv2i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v9 -; RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32D-NEXT: vsrl.vx v8, v10, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV32D-NEXT: vncvt.x.x.w v10, v8 -; RV32D-NEXT: li a0, 1023 -; RV32D-NEXT: vsub.vx v8, v10, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv2i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV64D-NEXT: vrsub.vi v9, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v9 -; RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64D-NEXT: vsrl.vx v8, v10, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64D-NEXT: vncvt.x.x.w v10, v8 -; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v10, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv2i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v10, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v10, v8 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v10, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv2i32( %va, i1 true) ret %a } @@ -2949,35 +2404,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv4i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v10 -; RV32D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32D-NEXT: vsrl.vx v8, v12, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32D-NEXT: vncvt.x.x.w v12, v8 -; RV32D-NEXT: li a0, 1023 -; RV32D-NEXT: vsub.vx v8, v12, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv4i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV64D-NEXT: vrsub.vi v10, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v10 -; RV64D-NEXT: vfwcvt.f.xu.v v12, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64D-NEXT: vsrl.vx v8, v12, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64D-NEXT: vncvt.x.x.w v12, v8 -; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v12, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv4i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vrsub.vi v10, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v10 +; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v12, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v12, v8 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v12, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv4i32( %va, i1 true) ret %a } @@ -3041,35 +2481,20 @@ ; RV64I-NEXT: vsrl.vi v8, v8, 24 ; RV64I-NEXT: ret ; -; RV32D-LABEL: cttz_zero_undef_nxv8i32: -; RV32D: # %bb.0: -; RV32D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV32D-NEXT: vrsub.vi v12, v8, 0 -; RV32D-NEXT: vand.vv v8, v8, v12 -; RV32D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV32D-NEXT: li a0, 52 -; RV32D-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32D-NEXT: vsrl.vx v8, v16, a0 -; RV32D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32D-NEXT: vncvt.x.x.w v16, v8 -; RV32D-NEXT: li a0, 1023 -; RV32D-NEXT: vsub.vx v8, v16, a0 -; RV32D-NEXT: ret -; -; RV64D-LABEL: cttz_zero_undef_nxv8i32: -; RV64D: # %bb.0: -; RV64D-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; RV64D-NEXT: vrsub.vi v12, v8, 0 -; RV64D-NEXT: vand.vv v8, v8, v12 -; RV64D-NEXT: vfwcvt.f.xu.v v16, v8 -; RV64D-NEXT: li a0, 52 -; RV64D-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64D-NEXT: vsrl.vx v8, v16, a0 -; RV64D-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64D-NEXT: vncvt.x.x.w v16, v8 -; RV64D-NEXT: li a0, 1023 -; RV64D-NEXT: vsub.vx v8, v16, a0 -; RV64D-NEXT: ret +; CHECK-D-LABEL: cttz_zero_undef_nxv8i32: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vrsub.vi v12, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v12 +; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-D-NEXT: vsrl.vx v8, v16, a0 +; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-D-NEXT: vncvt.x.x.w v16, v8 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v16, a0 +; CHECK-D-NEXT: ret %a = call @llvm.cttz.nxv8i32( %va, i1 true) ret %a } diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I -; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I -; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbNOZbt -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbNOZbt +; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32,RV32I +; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64,RV64I +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32,RV32IZbb +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64,RV64IZbb ; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbZbt ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbZbt @@ -13,19 +13,19 @@ declare i64 @llvm.ssub.sat.i64(i64, i64) define signext i32 @func(i32 signext %x, i32 signext %y) nounwind { -; RV32I-LABEL: func: -; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: sgtz a3, a1 -; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: slt a1, a0, a2 -; RV32I-NEXT: beq a3, a1, .LBB0_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: .LBB0_2: -; RV32I-NEXT: ret +; RV32-LABEL: func: +; RV32: # %bb.0: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: sgtz a3, a1 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: slt a1, a0, a2 +; RV32-NEXT: beq a3, a1, .LBB0_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: .LBB0_2: +; RV32-NEXT: ret ; ; RV64I-LABEL: func: ; RV64I: # %bb.0: @@ -44,20 +44,6 @@ ; RV64I-NEXT: lui a0, 524288 ; RV64I-NEXT: ret ; -; RV32IZbbNOZbt-LABEL: func: -; RV32IZbbNOZbt: # %bb.0: -; RV32IZbbNOZbt-NEXT: mv a2, a0 -; RV32IZbbNOZbt-NEXT: sgtz a3, a1 -; RV32IZbbNOZbt-NEXT: sub a0, a0, a1 -; RV32IZbbNOZbt-NEXT: slt a1, a0, a2 -; RV32IZbbNOZbt-NEXT: beq a3, a1, .LBB0_2 -; RV32IZbbNOZbt-NEXT: # %bb.1: -; RV32IZbbNOZbt-NEXT: srai a0, a0, 31 -; RV32IZbbNOZbt-NEXT: lui a1, 524288 -; RV32IZbbNOZbt-NEXT: xor a0, a0, a1 -; RV32IZbbNOZbt-NEXT: .LBB0_2: -; RV32IZbbNOZbt-NEXT: ret -; ; RV64IZbb-LABEL: func: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: sub a0, a0, a1 @@ -83,73 +69,39 @@ } define i64 @func2(i64 %x, i64 %y) nounwind { -; RV32I-LABEL: func2: -; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a1 -; RV32I-NEXT: sltu a1, a0, a2 -; RV32I-NEXT: sub a5, a4, a3 -; RV32I-NEXT: sub a1, a5, a1 -; RV32I-NEXT: xor a5, a4, a1 -; RV32I-NEXT: xor a3, a4, a3 -; RV32I-NEXT: and a3, a3, a5 -; RV32I-NEXT: bltz a3, .LBB1_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a0, a2 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srai a0, a1, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a1, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: func2: -; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: sgtz a3, a1 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: slt a1, a0, a2 -; RV64I-NEXT: beq a3, a1, .LBB1_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srai a0, a0, 63 -; RV64I-NEXT: li a1, -1 -; RV64I-NEXT: slli a1, a1, 63 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: .LBB1_2: -; RV64I-NEXT: ret -; -; RV32IZbbNOZbt-LABEL: func2: -; RV32IZbbNOZbt: # %bb.0: -; RV32IZbbNOZbt-NEXT: mv a4, a1 -; RV32IZbbNOZbt-NEXT: sltu a1, a0, a2 -; RV32IZbbNOZbt-NEXT: sub a5, a4, a3 -; RV32IZbbNOZbt-NEXT: sub a1, a5, a1 -; RV32IZbbNOZbt-NEXT: xor a5, a4, a1 -; RV32IZbbNOZbt-NEXT: xor a3, a4, a3 -; RV32IZbbNOZbt-NEXT: and a3, a3, a5 -; RV32IZbbNOZbt-NEXT: bltz a3, .LBB1_2 -; RV32IZbbNOZbt-NEXT: # %bb.1: -; RV32IZbbNOZbt-NEXT: sub a0, a0, a2 -; RV32IZbbNOZbt-NEXT: ret -; RV32IZbbNOZbt-NEXT: .LBB1_2: -; RV32IZbbNOZbt-NEXT: srai a0, a1, 31 -; RV32IZbbNOZbt-NEXT: lui a1, 524288 -; RV32IZbbNOZbt-NEXT: xor a1, a0, a1 -; RV32IZbbNOZbt-NEXT: ret +; RV32-LABEL: func2: +; RV32: # %bb.0: +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: sltu a1, a0, a2 +; RV32-NEXT: sub a5, a4, a3 +; RV32-NEXT: sub a1, a5, a1 +; RV32-NEXT: xor a5, a4, a1 +; RV32-NEXT: xor a3, a4, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: bltz a3, .LBB1_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: sub a0, a0, a2 +; RV32-NEXT: ret +; RV32-NEXT: .LBB1_2: +; RV32-NEXT: srai a0, a1, 31 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: xor a1, a0, a1 +; RV32-NEXT: ret ; -; RV64IZbbNOZbt-LABEL: func2: -; RV64IZbbNOZbt: # %bb.0: -; RV64IZbbNOZbt-NEXT: mv a2, a0 -; RV64IZbbNOZbt-NEXT: sgtz a3, a1 -; RV64IZbbNOZbt-NEXT: sub a0, a0, a1 -; RV64IZbbNOZbt-NEXT: slt a1, a0, a2 -; RV64IZbbNOZbt-NEXT: beq a3, a1, .LBB1_2 -; RV64IZbbNOZbt-NEXT: # %bb.1: -; RV64IZbbNOZbt-NEXT: srai a0, a0, 63 -; RV64IZbbNOZbt-NEXT: li a1, -1 -; RV64IZbbNOZbt-NEXT: slli a1, a1, 63 -; RV64IZbbNOZbt-NEXT: xor a0, a0, a1 -; RV64IZbbNOZbt-NEXT: .LBB1_2: -; RV64IZbbNOZbt-NEXT: ret +; RV64-LABEL: func2: +; RV64: # %bb.0: +; RV64-NEXT: mv a2, a0 +; RV64-NEXT: sgtz a3, a1 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: slt a1, a0, a2 +; RV64-NEXT: beq a3, a1, .LBB1_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: .LBB1_2: +; RV64-NEXT: ret ; ; RV32IZbbZbt-LABEL: func2: ; RV32IZbbZbt: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I -; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I -; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbNOZbt -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbNOZbt +; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32,RV32I +; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64,RV64I +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32,RV32IZbb +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64,RV64IZbb ; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbZbt ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbZbt @@ -13,20 +13,20 @@ declare i64 @llvm.ssub.sat.i64(i64, i64) define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { -; RV32I-LABEL: func32: -; RV32I: # %bb.0: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mul a0, a1, a2 -; RV32I-NEXT: sgtz a1, a0 -; RV32I-NEXT: sub a0, a3, a0 -; RV32I-NEXT: slt a2, a0, a3 -; RV32I-NEXT: beq a1, a2, .LBB0_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: .LBB0_2: -; RV32I-NEXT: ret +; RV32-LABEL: func32: +; RV32: # %bb.0: +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: mul a0, a1, a2 +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: sub a0, a3, a0 +; RV32-NEXT: slt a2, a0, a3 +; RV32-NEXT: beq a1, a2, .LBB0_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: .LBB0_2: +; RV32-NEXT: ret ; ; RV64I-LABEL: func32: ; RV64I: # %bb.0: @@ -47,21 +47,6 @@ ; RV64I-NEXT: lui a0, 524288 ; RV64I-NEXT: ret ; -; RV32IZbbNOZbt-LABEL: func32: -; RV32IZbbNOZbt: # %bb.0: -; RV32IZbbNOZbt-NEXT: mv a3, a0 -; RV32IZbbNOZbt-NEXT: mul a0, a1, a2 -; RV32IZbbNOZbt-NEXT: sgtz a1, a0 -; RV32IZbbNOZbt-NEXT: sub a0, a3, a0 -; RV32IZbbNOZbt-NEXT: slt a2, a0, a3 -; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2 -; RV32IZbbNOZbt-NEXT: # %bb.1: -; RV32IZbbNOZbt-NEXT: srai a0, a0, 31 -; RV32IZbbNOZbt-NEXT: lui a1, 524288 -; RV32IZbbNOZbt-NEXT: xor a0, a0, a1 -; RV32IZbbNOZbt-NEXT: .LBB0_2: -; RV32IZbbNOZbt-NEXT: ret -; ; RV64IZbb-LABEL: func32: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: sext.w a0, a0 @@ -91,73 +76,39 @@ } define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { -; RV32I-LABEL: func64: -; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: sltu a1, a0, a4 -; RV32I-NEXT: sub a3, a2, a5 -; RV32I-NEXT: sub a1, a3, a1 -; RV32I-NEXT: xor a3, a2, a1 -; RV32I-NEXT: xor a2, a2, a5 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: bltz a2, .LBB1_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a0, a4 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srai a0, a1, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a1, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: func64: -; RV64I: # %bb.0: -; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: sgtz a3, a2 -; RV64I-NEXT: sub a0, a0, a2 -; RV64I-NEXT: slt a1, a0, a1 -; RV64I-NEXT: beq a3, a1, .LBB1_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srai a0, a0, 63 -; RV64I-NEXT: li a1, -1 -; RV64I-NEXT: slli a1, a1, 63 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: .LBB1_2: -; RV64I-NEXT: ret -; -; RV32IZbbNOZbt-LABEL: func64: -; RV32IZbbNOZbt: # %bb.0: -; RV32IZbbNOZbt-NEXT: mv a2, a1 -; RV32IZbbNOZbt-NEXT: sltu a1, a0, a4 -; RV32IZbbNOZbt-NEXT: sub a3, a2, a5 -; RV32IZbbNOZbt-NEXT: sub a1, a3, a1 -; RV32IZbbNOZbt-NEXT: xor a3, a2, a1 -; RV32IZbbNOZbt-NEXT: xor a2, a2, a5 -; RV32IZbbNOZbt-NEXT: and a2, a2, a3 -; RV32IZbbNOZbt-NEXT: bltz a2, .LBB1_2 -; RV32IZbbNOZbt-NEXT: # %bb.1: -; RV32IZbbNOZbt-NEXT: sub a0, a0, a4 -; RV32IZbbNOZbt-NEXT: ret -; RV32IZbbNOZbt-NEXT: .LBB1_2: -; RV32IZbbNOZbt-NEXT: srai a0, a1, 31 -; RV32IZbbNOZbt-NEXT: lui a1, 524288 -; RV32IZbbNOZbt-NEXT: xor a1, a0, a1 -; RV32IZbbNOZbt-NEXT: ret +; RV32-LABEL: func64: +; RV32: # %bb.0: +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: sltu a1, a0, a4 +; RV32-NEXT: sub a3, a2, a5 +; RV32-NEXT: sub a1, a3, a1 +; RV32-NEXT: xor a3, a2, a1 +; RV32-NEXT: xor a2, a2, a5 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: bltz a2, .LBB1_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: sub a0, a0, a4 +; RV32-NEXT: ret +; RV32-NEXT: .LBB1_2: +; RV32-NEXT: srai a0, a1, 31 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: xor a1, a0, a1 +; RV32-NEXT: ret ; -; RV64IZbbNOZbt-LABEL: func64: -; RV64IZbbNOZbt: # %bb.0: -; RV64IZbbNOZbt-NEXT: mv a1, a0 -; RV64IZbbNOZbt-NEXT: sgtz a3, a2 -; RV64IZbbNOZbt-NEXT: sub a0, a0, a2 -; RV64IZbbNOZbt-NEXT: slt a1, a0, a1 -; RV64IZbbNOZbt-NEXT: beq a3, a1, .LBB1_2 -; RV64IZbbNOZbt-NEXT: # %bb.1: -; RV64IZbbNOZbt-NEXT: srai a0, a0, 63 -; RV64IZbbNOZbt-NEXT: li a1, -1 -; RV64IZbbNOZbt-NEXT: slli a1, a1, 63 -; RV64IZbbNOZbt-NEXT: xor a0, a0, a1 -; RV64IZbbNOZbt-NEXT: .LBB1_2: -; RV64IZbbNOZbt-NEXT: ret +; RV64-LABEL: func64: +; RV64: # %bb.0: +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: sgtz a3, a2 +; RV64-NEXT: sub a0, a0, a2 +; RV64-NEXT: slt a1, a0, a1 +; RV64-NEXT: beq a3, a1, .LBB1_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: .LBB1_2: +; RV64-NEXT: ret ; ; RV32IZbbZbt-LABEL: func64: ; RV32IZbbZbt: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll --- a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll @@ -1,46 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV32,RV32I +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV32,RV32I ; RUN: llc -mtriple=riscv64 < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV64,RV64I +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV64,RV64I ; RUN: llc -mtriple=riscv32 -mattr=+zbb < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZBB +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV32,RV32ZBB ; RUN: llc -mtriple=riscv64 -mattr=+zbb < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZBB +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV64,RV64ZBB ; TODO: Should we convert these to X ^ ((X ^ Y) & M) form when Zbb isn't ; present? define i8 @out8(i8 %x, i8 %y, i8 %mask) { -; RV32I-LABEL: out8: -; RV32I: # %bb.0: -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out8: -; RV64I: # %bb.0: -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: not a2, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out8: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out8: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out8: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: not a2, a2 +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out8: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %mx = and i8 %x, %mask %notmask = xor i8 %mask, -1 %my = and i8 %y, %notmask @@ -49,35 +34,20 @@ } define i16 @out16(i16 %x, i16 %y, i16 %mask) { -; RV32I-LABEL: out16: -; RV32I: # %bb.0: -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out16: -; RV64I: # %bb.0: -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: not a2, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out16: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out16: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out16: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: not a2, a2 +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out16: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %mx = and i16 %x, %mask %notmask = xor i16 %mask, -1 %my = and i16 %y, %notmask @@ -86,35 +56,20 @@ } define i32 @out32(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: out32: -; RV32I: # %bb.0: -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out32: -; RV64I: # %bb.0: -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: not a2, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out32: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out32: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out32: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: not a2, a2 +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out32: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %mx = and i32 %x, %mask %notmask = xor i32 %mask, -1 %my = and i32 %y, %notmask @@ -172,33 +127,19 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define i8 @in8(i8 %x, i8 %y, i8 %mask) { -; RV32I-LABEL: in8: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in8: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in8: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in8: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in8: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in8: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %n0 = xor i8 %x, %y %n1 = and i8 %n0, %mask %r = xor i8 %n1, %y @@ -206,33 +147,19 @@ } define i16 @in16(i16 %x, i16 %y, i16 %mask) { -; RV32I-LABEL: in16: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in16: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in16: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in16: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in16: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in16: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %n0 = xor i16 %x, %y %n1 = and i16 %n0, %mask %r = xor i16 %n1, %y @@ -240,33 +167,19 @@ } define i32 @in32(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in32: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in32: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in32: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in32: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in32: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in32: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask %r = xor i32 %n1, %y @@ -318,33 +231,19 @@ ; ============================================================================ ; define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_commutativity_0_0_1: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_commutativity_0_0_1: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_commutativity_0_0_1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_commutativity_0_0_1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_commutativity_0_0_1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_commutativity_0_0_1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped %r = xor i32 %n1, %y @@ -352,33 +251,19 @@ } define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_commutativity_0_1_0: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a1, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_commutativity_0_1_0: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_commutativity_0_1_0: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_commutativity_0_1_0: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_commutativity_0_1_0: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a1, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_commutativity_0_1_0: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask %r = xor i32 %y, %n1 ; swapped @@ -386,33 +271,19 @@ } define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_commutativity_0_1_1: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: xor a0, a1, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_commutativity_0_1_1: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_commutativity_0_1_1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_commutativity_0_1_1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_commutativity_0_1_1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: xor a0, a1, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_commutativity_0_1_1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped %r = xor i32 %y, %n1 ; swapped @@ -420,33 +291,19 @@ } define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_commutativity_1_0_0: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a1, a0, a1 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a0, a1, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_commutativity_1_0_0: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a1, a0, a1 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_commutativity_1_0_0: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_commutativity_1_0_0: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_commutativity_1_0_0: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a1, a0, a1 +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a0, a1, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_commutativity_1_0_0: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a1, a0 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask %r = xor i32 %n1, %x ; %x instead of %y @@ -454,33 +311,19 @@ } define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_commutativity_1_0_1: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a1, a0, a1 -; RV32I-NEXT: and a1, a2, a1 -; RV32I-NEXT: xor a0, a1, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_commutativity_1_0_1: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a1, a0, a1 -; RV64I-NEXT: and a1, a2, a1 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_commutativity_1_0_1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_commutativity_1_0_1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_commutativity_1_0_1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a1, a0, a1 +; CHECK-I-NEXT: and a1, a2, a1 +; CHECK-I-NEXT: xor a0, a1, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_commutativity_1_0_1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a1, a0 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped %r = xor i32 %n1, %x ; %x instead of %y @@ -488,33 +331,19 @@ } define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_commutativity_1_1_0: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a1, a0, a1 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_commutativity_1_1_0: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a1, a0, a1 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_commutativity_1_1_0: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_commutativity_1_1_0: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_commutativity_1_1_0: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a1, a0, a1 +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_commutativity_1_1_0: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a1, a0 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask %r = xor i32 %x, %n1 ; swapped, %x instead of %y @@ -522,33 +351,19 @@ } define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_commutativity_1_1_1: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a1, a0, a1 -; RV32I-NEXT: and a1, a2, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_commutativity_1_1_1: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a1, a0, a1 -; RV64I-NEXT: and a1, a2, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_commutativity_1_1_1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_commutativity_1_1_1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_commutativity_1_1_1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a1, a0, a1 +; CHECK-I-NEXT: and a1, a2, a1 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_commutativity_1_1_1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a1, a0 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped %r = xor i32 %x, %n1 ; swapped, %x instead of %y @@ -560,37 +375,21 @@ ; ============================================================================ ; define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { -; RV32I-LABEL: in_complex_y0: -; RV32I: # %bb.0: -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_y0: -; RV64I: # %bb.0: -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_y0: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a3 -; RV32ZBB-NEXT: andn a1, a1, a3 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_y0: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a3 -; RV64ZBB-NEXT: andn a1, a1, a3 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_complex_y0: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a3 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_y0: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a3 +; CHECK-ZBB-NEXT: andn a1, a1, a3 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -599,37 +398,21 @@ } define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { -; RV32I-LABEL: in_complex_y1: -; RV32I: # %bb.0: -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: xor a0, a1, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_y1: -; RV64I: # %bb.0: -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_y1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a3 -; RV32ZBB-NEXT: andn a1, a1, a3 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_y1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a3 -; RV64ZBB-NEXT: andn a1, a1, a3 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_complex_y1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a3 +; CHECK-I-NEXT: xor a0, a1, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_y1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a3 +; CHECK-ZBB-NEXT: andn a1, a1, a3 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -642,123 +425,73 @@ ; ============================================================================ ; define i32 @in_complex_m0(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { -; RV32I-LABEL: in_complex_m0: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a2, a2, a3 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_m0: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a2, a2, a3 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_m0: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: xor a2, a2, a3 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_m0: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: xor a2, a2, a3 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_complex_m0: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a2, a2, a3 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_m0: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: xor a2, a2, a3 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %mask = xor i32 %m_a, %m_b %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask %r = xor i32 %n1, %y - ret i32 %r -} - -define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { -; RV32I-LABEL: in_complex_m1: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a2, a2, a3 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_m1: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a2, a2, a3 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_m1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: xor a2, a2, a3 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_m1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: xor a2, a2, a3 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret - %mask = xor i32 %m_a, %m_b - %n0 = xor i32 %x, %y - %n1 = and i32 %mask, %n0 - %r = xor i32 %n1, %y - ret i32 %r -} - -; ============================================================================ ; -; Both Y and M are complex. -; ============================================================================ ; - -define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { -; RV32I-LABEL: in_complex_y0_m0: -; RV32I: # %bb.0: -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a2, a3, a4 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_y0_m0: -; RV64I: # %bb.0: -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a2, a3, a4 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_y0_m0: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: xor a2, a3, a4 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_y0_m0: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: xor a2, a3, a4 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret + ret i32 %r +} + +define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { +; CHECK-I-LABEL: in_complex_m1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xor a2, a2, a3 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_m1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: xor a2, a2, a3 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %n1, %y + ret i32 %r +} + +; ============================================================================ ; +; Both Y and M are complex. +; ============================================================================ ; + +define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-I-LABEL: in_complex_y0_m0: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a2, a3, a4 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_y0_m0: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: xor a2, a3, a4 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b %n0 = xor i32 %x, %y @@ -768,41 +501,23 @@ } define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { -; RV32I-LABEL: in_complex_y1_m0: -; RV32I: # %bb.0: -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a2, a3, a4 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a1, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_y1_m0: -; RV64I: # %bb.0: -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a2, a3, a4 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_y1_m0: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: xor a2, a3, a4 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_y1_m0: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: xor a2, a3, a4 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_complex_y1_m0: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a2, a3, a4 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a1, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_y1_m0: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: xor a2, a3, a4 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b %n0 = xor i32 %x, %y @@ -812,41 +527,23 @@ } define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { -; RV32I-LABEL: in_complex_y0_m1: -; RV32I: # %bb.0: -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a2, a3, a4 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_y0_m1: -; RV64I: # %bb.0: -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a2, a3, a4 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_y0_m1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: xor a2, a3, a4 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_y0_m1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: xor a2, a3, a4 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_complex_y0_m1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a2, a3, a4 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_y0_m1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: xor a2, a3, a4 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b %n0 = xor i32 %x, %y @@ -856,41 +553,23 @@ } define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { -; RV32I-LABEL: in_complex_y1_m1: -; RV32I: # %bb.0: -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: xor a2, a3, a4 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: xor a0, a1, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_complex_y1_m1: -; RV64I: # %bb.0: -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: xor a2, a3, a4 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_complex_y1_m1: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: xor a2, a3, a4 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_complex_y1_m1: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: xor a2, a3, a4 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_complex_y1_m1: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: xor a2, a3, a4 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: xor a0, a1, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_complex_y1_m1: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a1, a1, a2 +; CHECK-ZBB-NEXT: xor a2, a3, a4 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b %n0 = xor i32 %x, %y @@ -904,31 +583,18 @@ ; ============================================================================ ; define i32 @out_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: out_constant_varx_mone: -; RV32I: # %bb.0: -; RV32I-NEXT: not a1, a2 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out_constant_varx_mone: -; RV64I: # %bb.0: -; RV64I-NEXT: not a1, a2 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out_constant_varx_mone: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a2, a0 -; RV32ZBB-NEXT: orn a0, a0, a2 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out_constant_varx_mone: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a2, a0 -; RV64ZBB-NEXT: orn a0, a0, a2 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out_constant_varx_mone: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a1, a2 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out_constant_varx_mone: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a0, a2, a0 +; CHECK-ZBB-NEXT: orn a0, a0, a2 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %mask, %x %my = and i32 %notmask, -1 @@ -937,31 +603,18 @@ } define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_varx_mone: -; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a0 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: not a0, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_varx_mone: -; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_varx_mone: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a2, a0 -; RV32ZBB-NEXT: not a0, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_varx_mone: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a2, a0 -; RV64ZBB-NEXT: not a0, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_varx_mone: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a0, a0 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: not a0, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_varx_mone: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a2, a0 +; CHECK-ZBB-NEXT: not a0, a0 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, -1 ; %x %n1 = and i32 %n0, %mask %r = xor i32 %n1, -1 @@ -983,35 +636,20 @@ ; This is not a canonical form. Testing for completeness only. define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_varx_mone_invmask: -; RV32I: # %bb.0: -; RV32I-NEXT: not a1, a2 -; RV32I-NEXT: not a0, a0 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: not a0, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_varx_mone_invmask: -; RV64I: # %bb.0: -; RV64I-NEXT: not a1, a2 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_varx_mone_invmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: not a0, a0 -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: not a0, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_varx_mone_invmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: not a0, a0 -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: not a0, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_varx_mone_invmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a1, a2 +; CHECK-I-NEXT: not a0, a0 +; CHECK-I-NEXT: and a0, a0, a1 +; CHECK-I-NEXT: not a0, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_varx_mone_invmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: not a0, a0 +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: not a0, a0 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, -1 ; %x %n1 = and i32 %n0, %notmask @@ -1020,37 +658,21 @@ } define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: out_constant_varx_42: -; RV32I: # %bb.0: -; RV32I-NEXT: not a1, a2 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: andi a1, a1, 42 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out_constant_varx_42: -; RV64I: # %bb.0: -; RV64I-NEXT: not a1, a2 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: andi a1, a1, 42 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out_constant_varx_42: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a2, a0 -; RV32ZBB-NEXT: li a1, 42 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out_constant_varx_42: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a2, a0 -; RV64ZBB-NEXT: li a1, 42 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out_constant_varx_42: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a1, a2 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: andi a1, a1, 42 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out_constant_varx_42: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a0, a2, a0 +; CHECK-ZBB-NEXT: li a1, 42 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %mask, %x %my = and i32 %notmask, 42 @@ -1059,33 +681,19 @@ } define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_varx_42: -; RV32I: # %bb.0: -; RV32I-NEXT: xori a0, a0, 42 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xori a0, a0, 42 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_varx_42: -; RV64I: # %bb.0: -; RV64I-NEXT: xori a0, a0, 42 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xori a0, a0, 42 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_varx_42: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a2, a0 -; RV32ZBB-NEXT: ori a1, a2, 42 -; RV32ZBB-NEXT: andn a0, a1, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_varx_42: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a2, a0 -; RV64ZBB-NEXT: ori a1, a2, 42 -; RV64ZBB-NEXT: andn a0, a1, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_varx_42: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xori a0, a0, 42 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xori a0, a0, 42 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_varx_42: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a2, a0 +; CHECK-ZBB-NEXT: ori a1, a2, 42 +; CHECK-ZBB-NEXT: andn a0, a1, a0 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 %x, 42 ; %x %n1 = and i32 %n0, %mask %r = xor i32 %n1, 42 @@ -1094,35 +702,20 @@ ; This is not a canonical form. Testing for completeness only. define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: out_constant_varx_42_invmask: -; RV32I: # %bb.0: -; RV32I-NEXT: not a1, a2 -; RV32I-NEXT: and a0, a1, a0 -; RV32I-NEXT: andi a1, a2, 42 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out_constant_varx_42_invmask: -; RV64I: # %bb.0: -; RV64I-NEXT: not a1, a2 -; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: andi a1, a2, 42 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out_constant_varx_42_invmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: andi a1, a2, 42 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out_constant_varx_42_invmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: andi a1, a2, 42 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out_constant_varx_42_invmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a1, a2 +; CHECK-I-NEXT: and a0, a1, a0 +; CHECK-I-NEXT: andi a1, a2, 42 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out_constant_varx_42_invmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: andi a1, a2, 42 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %notmask, %x %my = and i32 %mask, 42 @@ -1132,35 +725,20 @@ ; This is not a canonical form. Testing for completeness only. define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_varx_42_invmask: -; RV32I: # %bb.0: -; RV32I-NEXT: not a1, a2 -; RV32I-NEXT: xori a0, a0, 42 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: xori a0, a0, 42 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_varx_42_invmask: -; RV64I: # %bb.0: -; RV64I-NEXT: not a1, a2 -; RV64I-NEXT: xori a0, a0, 42 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: xori a0, a0, 42 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_varx_42_invmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: andi a1, a2, 42 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_varx_42_invmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: andi a1, a2, 42 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_varx_42_invmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a1, a2 +; CHECK-I-NEXT: xori a0, a0, 42 +; CHECK-I-NEXT: and a0, a0, a1 +; CHECK-I-NEXT: xori a0, a0, 42 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_varx_42_invmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: andi a1, a2, 42 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, 42 ; %x %n1 = and i32 %n0, %notmask @@ -1181,31 +759,18 @@ } define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_mone_vary: -; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_mone_vary: -; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_mone_vary: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a2, a1 -; RV32ZBB-NEXT: xor a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_mone_vary: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a2, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_mone_vary: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a0, a1 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_mone_vary: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a2, a1 +; CHECK-ZBB-NEXT: xor a0, a0, a1 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 -1, %y ; %x %n1 = and i32 %n0, %mask %r = xor i32 %n1, %y @@ -1214,31 +779,18 @@ ; This is not a canonical form. Testing for completeness only. define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: out_constant_mone_vary_invmask: -; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a2 -; RV32I-NEXT: and a1, a2, a1 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out_constant_mone_vary_invmask: -; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a2 -; RV64I-NEXT: and a1, a2, a1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out_constant_mone_vary_invmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a2, a1 -; RV32ZBB-NEXT: orn a0, a0, a2 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out_constant_mone_vary_invmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a2, a1 -; RV64ZBB-NEXT: orn a0, a0, a2 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out_constant_mone_vary_invmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a0, a2 +; CHECK-I-NEXT: and a1, a2, a1 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out_constant_mone_vary_invmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a0, a2, a1 +; CHECK-ZBB-NEXT: orn a0, a0, a2 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %notmask, -1 %my = and i32 %mask, %y @@ -1248,35 +800,20 @@ ; This is not a canonical form. Testing for completeness only. define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_mone_vary_invmask: -; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a2 -; RV32I-NEXT: not a2, a1 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_mone_vary_invmask: -; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a2 -; RV64I-NEXT: not a2, a1 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_mone_vary_invmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: not a0, a1 -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: xor a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_mone_vary_invmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: not a0, a1 -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: xor a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_mone_vary_invmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a0, a2 +; CHECK-I-NEXT: not a2, a1 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_mone_vary_invmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: not a0, a1 +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: xor a0, a0, a1 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 -1, %y ; %x %n1 = and i32 %n0, %notmask @@ -1285,35 +822,20 @@ } define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: out_constant_42_vary: -; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a2 -; RV32I-NEXT: andi a2, a2, 42 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out_constant_42_vary: -; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a2 -; RV64I-NEXT: andi a2, a2, 42 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out_constant_42_vary: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a0, a2, 42 -; RV32ZBB-NEXT: andn a1, a1, a2 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out_constant_42_vary: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a0, a2, 42 -; RV64ZBB-NEXT: andn a1, a1, a2 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out_constant_42_vary: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a0, a2 +; CHECK-I-NEXT: andi a2, a2, 42 +; CHECK-I-NEXT: and a0, a0, a1 +; CHECK-I-NEXT: or a0, a2, a0 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out_constant_42_vary: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andi a0, a2, 42 +; CHECK-ZBB-NEXT: andn a1, a1, a2 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %mask, 42 %my = and i32 %notmask, %y @@ -1322,33 +844,19 @@ } define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_42_vary: -; RV32I: # %bb.0: -; RV32I-NEXT: xori a0, a1, 42 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_42_vary: -; RV64I: # %bb.0: -; RV64I-NEXT: xori a0, a1, 42 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_42_vary: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a1, a2 -; RV32ZBB-NEXT: andi a1, a2, 42 -; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_42_vary: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a1, a2 -; RV64ZBB-NEXT: andi a1, a2, 42 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_42_vary: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: xori a0, a1, 42 +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_42_vary: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a1, a2 +; CHECK-ZBB-NEXT: andi a1, a2, 42 +; CHECK-ZBB-NEXT: or a0, a1, a0 +; CHECK-ZBB-NEXT: ret %n0 = xor i32 42, %y ; %x %n1 = and i32 %n0, %mask %r = xor i32 %n1, %y @@ -1357,37 +865,21 @@ ; This is not a canonical form. Testing for completeness only. define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: out_constant_42_vary_invmask: -; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a2 -; RV32I-NEXT: andi a0, a0, 42 -; RV32I-NEXT: and a1, a2, a1 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: out_constant_42_vary_invmask: -; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a2 -; RV64I-NEXT: andi a0, a0, 42 -; RV64I-NEXT: and a1, a2, a1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: out_constant_42_vary_invmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: li a0, 42 -; RV32ZBB-NEXT: andn a0, a0, a2 -; RV32ZBB-NEXT: and a1, a2, a1 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: out_constant_42_vary_invmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: li a0, 42 -; RV64ZBB-NEXT: andn a0, a0, a2 -; RV64ZBB-NEXT: and a1, a2, a1 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: out_constant_42_vary_invmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a0, a2 +; CHECK-I-NEXT: andi a0, a0, 42 +; CHECK-I-NEXT: and a1, a2, a1 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: out_constant_42_vary_invmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: li a0, 42 +; CHECK-ZBB-NEXT: andn a0, a0, a2 +; CHECK-ZBB-NEXT: and a1, a2, a1 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %notmask, 42 %my = and i32 %mask, %y @@ -1397,35 +889,20 @@ ; This is not a canonical form. Testing for completeness only. define i32 @in_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) { -; RV32I-LABEL: in_constant_42_vary_invmask: -; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a2 -; RV32I-NEXT: xori a2, a1, 42 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: in_constant_42_vary_invmask: -; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a2 -; RV64I-NEXT: xori a2, a1, 42 -; RV64I-NEXT: and a0, a2, a0 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: in_constant_42_vary_invmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andn a0, a2, a1 -; RV32ZBB-NEXT: ori a1, a2, 42 -; RV32ZBB-NEXT: andn a0, a1, a0 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: in_constant_42_vary_invmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andn a0, a2, a1 -; RV64ZBB-NEXT: ori a1, a2, 42 -; RV64ZBB-NEXT: andn a0, a1, a0 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: in_constant_42_vary_invmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: not a0, a2 +; CHECK-I-NEXT: xori a2, a1, 42 +; CHECK-I-NEXT: and a0, a2, a0 +; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: in_constant_42_vary_invmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: andn a0, a2, a1 +; CHECK-ZBB-NEXT: ori a1, a2, 42 +; CHECK-ZBB-NEXT: andn a0, a1, a0 +; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 42, %y ; %x %n1 = and i32 %n0, %notmask @@ -1525,35 +1002,20 @@ ; Various bad variants define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) { -; RV32I-LABEL: n0_badmask: -; RV32I: # %bb.0: -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: not a2, a3 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: n0_badmask: -; RV64I: # %bb.0: -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: not a2, a3 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: n0_badmask: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: andn a1, a1, a3 -; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: n0_badmask: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: andn a1, a1, a3 -; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: ret +; CHECK-I-LABEL: n0_badmask: +; CHECK-I: # %bb.0: +; CHECK-I-NEXT: and a0, a0, a2 +; CHECK-I-NEXT: not a2, a3 +; CHECK-I-NEXT: and a1, a1, a2 +; CHECK-I-NEXT: or a0, a0, a1 +; CHECK-I-NEXT: ret +; +; CHECK-ZBB-LABEL: n0_badmask: +; CHECK-ZBB: # %bb.0: +; CHECK-ZBB-NEXT: and a0, a0, a2 +; CHECK-ZBB-NEXT: andn a1, a1, a3 +; CHECK-ZBB-NEXT: or a0, a0, a1 +; CHECK-ZBB-NEXT: ret %mx = and i32 %x, %mask %notmask = xor i32 %mask2, -1 ; %mask2 instead of %mask %my = and i32 %y, %notmask