diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -1,49 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64 define void @bswap_v8i16(<8 x i16>* %x, <8 x i16>* %y) { -; LMULMAX2-RV32-LABEL: bswap_v8i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bswap_v8i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bswap_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: bswap_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: bswap_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a) @@ -53,81 +23,43 @@ declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) define void @bswap_v4i32(<4 x i32>* %x, <4 x i32>* %y) { -; LMULMAX2-RV32-LABEL: bswap_v4i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 16 -; LMULMAX2-RV32-NEXT: addi a1, a1, -256 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX2-RV32-NEXT: vsll.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; RV32-LABEL: bswap_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsrl.vi v9, v8, 8 +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v10, v8, 8 +; RV32-NEXT: lui a1, 4080 +; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX2-RV64-LABEL: bswap_v4i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 16 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -256 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX2-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bswap_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: lui a1, 16 -; LMULMAX1-RV32-NEXT: addi a1, a1, -256 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: lui a1, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: bswap_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: lui a1, 16 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -256 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: lui a1, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: bswap_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vsrl.vi v9, v8, 8 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: addiw a1, a1, -256 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: vor.vv v9, v9, v10 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: lui a1, 4080 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a) @@ -137,189 +69,97 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) { -; LMULMAX2-RV32-LABEL: bswap_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: li a2, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX2-RV32-NEXT: lui a3, 16 -; LMULMAX2-RV32-NEXT: addi a3, a3, -256 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX2-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX2-RV32-NEXT: li a5, 5 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX2-RV32-NEXT: lui a5, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v11, v11, a5, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX2-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV32-NEXT: li a5, 255 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a5 -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v10, v11, v10 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v11, a3 -; LMULMAX2-RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX2-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v11, v8, a2 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a4 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vand.vv v11, v11, v12 -; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bswap_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: li a2, 40 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX2-RV64-NEXT: lui a3, 16 -; LMULMAX2-RV64-NEXT: addiw a3, a3, -256 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX2-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV64-NEXT: lui a3, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX2-RV64-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX2-RV64-NEXT: li a3, 255 -; LMULMAX2-RV64-NEXT: slli a4, a3, 24 -; LMULMAX2-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX2-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: slli a4, a3, 32 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX2-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX2-RV64-NEXT: slli a4, a3, 40 -; LMULMAX2-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX2-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV64-NEXT: vsll.vx v11, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: slli a1, a3, 48 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v11, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bswap_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: li a2, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: lui a3, 16 -; LMULMAX1-RV32-NEXT: addi a3, a3, -256 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV32-NEXT: lui a4, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: li a5, 5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV32-NEXT: lui a5, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v11, v11, a5, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: li a5, 255 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a5 -; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a3 -; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v12, a4 -; LMULMAX1-RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: bswap_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: li a5, 5 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vmv.s.x v0, a5 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.i v11, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vor.vv v10, v11, v10 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: li a5, 255 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v10, a5 +; RV32-NEXT: vmerge.vim v10, v10, 0, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsll.vi v11, v8, 8 +; RV32-NEXT: vand.vv v10, v11, v10 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v11, a3 +; RV32-NEXT: vmerge.vim v11, v11, 0, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsll.vi v12, v8, 24 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vor.vv v10, v11, v10 +; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v12, a4 +; RV32-NEXT: vmerge.vim v12, v12, 0, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vand.vv v11, v11, v12 +; RV32-NEXT: vsll.vx v8, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v11 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: bswap_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: li a2, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: lui a3, 16 -; LMULMAX1-RV64-NEXT: addiw a3, a3, -256 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV64-NEXT: lui a3, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: li a3, 255 -; LMULMAX1-RV64-NEXT: slli a4, a3, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: slli a4, a3, 32 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: slli a4, a3, 40 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsll.vx v11, v8, a1 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: slli a1, a3, 48 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: bswap_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsrl.vx v9, v8, a1 +; RV64-NEXT: li a2, 40 +; RV64-NEXT: vsrl.vx v10, v8, a2 +; RV64-NEXT: lui a3, 16 +; RV64-NEXT: addiw a3, a3, -256 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsrl.vi v11, v8, 8 +; RV64-NEXT: li a3, 255 +; RV64-NEXT: slli a4, a3, 24 +; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: vor.vv v10, v11, v10 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: vand.vx v10, v10, a4 +; RV64-NEXT: vsll.vi v11, v8, 24 +; RV64-NEXT: slli a4, a3, 40 +; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: vor.vv v10, v11, v10 +; RV64-NEXT: vsll.vx v11, v8, a1 +; RV64-NEXT: vsll.vx v8, v8, a2 +; RV64-NEXT: slli a1, a3, 48 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v11, v8 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -1,155 +1,59 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV64 define void @ctlz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_v16i8: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: li a1, 85 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: li a1, 51 -; LMULMAX2-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctlz_v16i8: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: li a1, 85 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: li a1, 51 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctlz_v16i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: li a1, 85 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: li a1, 51 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctlz_v16i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: li a1, 85 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: li a1, 51 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ctlz_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: li a1, 85 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: vand.vx v9, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret ; -; LMULMAX8-RV32-LABEL: ctlz_v16i8: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX8-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; LMULMAX8-RV32-NEXT: vzext.vf4 v12, v8 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v12, v12 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wx v9, v10, zero -; LMULMAX8-RV32-NEXT: li a1, 134 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: vrsub.vx v8, v9, a1 -; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v16i8: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX8-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; LMULMAX8-RV64-NEXT: vzext.vf4 v12, v8 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v12, v12 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wx v9, v10, zero -; LMULMAX8-RV64-NEXT: li a1, 134 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: vrsub.vx v8, v9, a1 -; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v16i8: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; LMULMAX8-NEXT: vle8.v v8, (a0) +; LMULMAX8-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; LMULMAX8-NEXT: vzext.vf4 v12, v8 +; LMULMAX8-NEXT: vfcvt.f.xu.v v12, v12 +; LMULMAX8-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 +; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; LMULMAX8-NEXT: vnsrl.wx v9, v10, zero +; LMULMAX8-NEXT: li a1, 134 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vrsub.vx v8, v9, a1 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 +; LMULMAX8-NEXT: vse8.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) @@ -327,33 +231,19 @@ ; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: ctlz_v8i16: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX8-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-RV32-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX8-RV32-NEXT: li a1, 142 -; LMULMAX8-RV32-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 16 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v8i16: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX8-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-RV64-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX8-RV64-NEXT: li a1, 142 -; LMULMAX8-RV64-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 16 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v8i16: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; LMULMAX8-NEXT: vle16.v v8, (a0) +; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 +; LMULMAX8-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX8-NEXT: li a1, 142 +; LMULMAX8-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 16 +; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX8-NEXT: vse16.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) @@ -545,35 +435,20 @@ ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: ctlz_v4i32: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX8-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vnsrl.wx v9, v10, a1 -; LMULMAX8-RV32-NEXT: li a1, 1054 -; LMULMAX8-RV32-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v4i32: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX8-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vnsrl.wx v9, v10, a1 -; LMULMAX8-RV64-NEXT: li a1, 1054 -; LMULMAX8-RV64-NEXT: vrsub.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v4i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX8-NEXT: vle32.v v8, (a0) +; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vnsrl.wx v9, v10, a1 +; LMULMAX8-NEXT: li a1, 1054 +; LMULMAX8-NEXT: vrsub.vx v9, v9, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 32 +; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX8-NEXT: vse32.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) @@ -888,189 +763,97 @@ declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) define void @ctlz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_v32i8: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: li a1, 32 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: li a1, 85 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: li a1, 51 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-LABEL: ctlz_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: li a1, 32 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; LMULMAX2-NEXT: vle8.v v8, (a0) +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vxor.vi v8, v8, -1 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: li a1, 85 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: li a1, 51 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: vand.vi v8, v8, 15 +; LMULMAX2-NEXT: vse8.v v8, (a0) +; LMULMAX2-NEXT: ret ; -; LMULMAX2-RV64-LABEL: ctlz_v32i8: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: li a1, 32 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: li a1, 85 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: li a1, 51 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX1-LABEL: ctlz_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle8.v v8, (a1) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vxor.vi v8, v8, -1 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: li a2, 85 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: li a3, 51 +; LMULMAX1-NEXT: vand.vx v10, v8, a3 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: vand.vi v8, v8, 15 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 2 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vxor.vi v9, v9, -1 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a3 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vi v9, v9, 15 +; LMULMAX1-NEXT: vse8.v v9, (a0) +; LMULMAX1-NEXT: vse8.v v8, (a1) +; LMULMAX1-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle8.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: li a2, 85 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: li a3, 51 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vxor.vi v9, v9, -1 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-RV32-NEXT: vse8.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctlz_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle8.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: li a2, 85 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: li a3, 51 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-RV64-NEXT: vse8.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: ctlz_v32i8: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX8-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; LMULMAX8-RV32-NEXT: vzext.vf4 v16, v8 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v16, v16 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wi v12, v16, 23 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wx v10, v12, zero -; LMULMAX8-RV32-NEXT: li a1, 134 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: vrsub.vx v8, v10, a1 -; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v32i8: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX8-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; LMULMAX8-RV64-NEXT: vzext.vf4 v16, v8 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v16, v16 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wi v12, v16, 23 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wx v10, v12, zero -; LMULMAX8-RV64-NEXT: li a1, 134 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: vrsub.vx v8, v10, a1 -; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v32i8: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a1, 32 +; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; LMULMAX8-NEXT: vle8.v v8, (a0) +; LMULMAX8-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; LMULMAX8-NEXT: vzext.vf4 v16, v8 +; LMULMAX8-NEXT: vfcvt.f.xu.v v16, v16 +; LMULMAX8-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; LMULMAX8-NEXT: vnsrl.wi v12, v16, 23 +; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; LMULMAX8-NEXT: vnsrl.wx v10, v12, zero +; LMULMAX8-NEXT: li a1, 134 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vrsub.vx v8, v10, a1 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 +; LMULMAX8-NEXT: vse8.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) @@ -1268,33 +1051,19 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; -; LMULMAX8-RV32-LABEL: ctlz_v16i16: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX8-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-RV32-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV32-NEXT: li a1, 142 -; LMULMAX8-RV32-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 16 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v16i16: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX8-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-RV64-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV64-NEXT: li a1, 142 -; LMULMAX8-RV64-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 16 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v16i16: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; LMULMAX8-NEXT: vle16.v v8, (a0) +; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 +; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 +; LMULMAX8-NEXT: li a1, 142 +; LMULMAX8-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 16 +; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-NEXT: vse16.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) @@ -1508,35 +1277,20 @@ ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; -; LMULMAX8-RV32-LABEL: ctlz_v8i32: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX8-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vnsrl.wx v10, v12, a1 -; LMULMAX8-RV32-NEXT: li a1, 1054 -; LMULMAX8-RV32-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: ctlz_v8i32: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX8-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vnsrl.wx v10, v12, a1 -; LMULMAX8-RV64-NEXT: li a1, 1054 -; LMULMAX8-RV64-NEXT: vrsub.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: ctlz_v8i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX8-NEXT: vle32.v v8, (a0) +; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vnsrl.wx v10, v12, a1 +; LMULMAX8-NEXT: li a1, 1054 +; LMULMAX8-NEXT: vrsub.vx v10, v10, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 32 +; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-NEXT: vse32.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -1,147 +1,58 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV64 define void @cttz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_v16i8: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: li a1, 85 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: li a1, 51 -; LMULMAX2-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: cttz_v16i8: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: li a1, 85 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: li a1, 51 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: cttz_v16i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: li a1, 85 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: li a1, 51 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: cttz_v16i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: li a1, 85 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: li a1, 51 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: cttz_v16i8: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX8-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV32-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV32-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; LMULMAX8-RV32-NEXT: vzext.vf4 v12, v9 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v12, v12 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wx v9, v10, zero -; LMULMAX8-RV32-NEXT: li a1, 127 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v9, a1 -; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret +; CHECK-LABEL: cttz_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: li a1, 85 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: vand.vx v9, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret ; -; LMULMAX8-RV64-LABEL: cttz_v16i8: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX8-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; LMULMAX8-RV64-NEXT: vzext.vf4 v12, v9 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v12, v12 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wx v9, v10, zero -; LMULMAX8-RV64-NEXT: li a1, 127 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: vsub.vx v8, v9, a1 -; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v16i8: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; LMULMAX8-NEXT: vle8.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX8-NEXT: vand.vv v9, v8, v9 +; LMULMAX8-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; LMULMAX8-NEXT: vzext.vf4 v12, v9 +; LMULMAX8-NEXT: vfcvt.f.xu.v v12, v12 +; LMULMAX8-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 +; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; LMULMAX8-NEXT: vnsrl.wx v9, v10, zero +; LMULMAX8-NEXT: li a1, 127 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vsub.vx v8, v9, a1 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 +; LMULMAX8-NEXT: vse8.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) @@ -303,37 +214,21 @@ ; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_v8i16: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX8-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV32-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV32-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX8-RV32-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX8-RV32-NEXT: li a1, 127 -; LMULMAX8-RV32-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 16 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_v8i16: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX8-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX8-RV64-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX8-RV64-NEXT: li a1, 127 -; LMULMAX8-RV64-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 16 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v8i16: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; LMULMAX8-NEXT: vle16.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX8-NEXT: vand.vv v9, v8, v9 +; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v9 +; LMULMAX8-NEXT: vnsrl.wi v9, v10, 23 +; LMULMAX8-NEXT: li a1, 127 +; LMULMAX8-NEXT: vsub.vx v9, v9, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 16 +; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX8-NEXT: vse16.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) @@ -501,39 +396,22 @@ ; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_v4i32: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX8-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV32-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV32-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vnsrl.wx v9, v10, a1 -; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_v4i32: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX8-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vnsrl.wx v9, v10, a1 -; LMULMAX8-RV64-NEXT: li a1, 1023 -; LMULMAX8-RV64-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v4i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX8-NEXT: vle32.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX8-NEXT: vand.vv v9, v8, v9 +; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v9 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vnsrl.wx v9, v10, a1 +; LMULMAX8-NEXT: li a1, 1023 +; LMULMAX8-NEXT: vsub.vx v9, v9, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 32 +; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX8-NEXT: vse32.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) @@ -788,173 +666,89 @@ declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) define void @cttz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_v32i8: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: li a1, 32 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: li a1, 85 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: li a1, 51 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret +; LMULMAX2-LABEL: cttz_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: li a1, 32 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; LMULMAX2-NEXT: vle8.v v8, (a0) +; LMULMAX2-NEXT: li a1, 1 +; LMULMAX2-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-NEXT: vxor.vi v8, v8, -1 +; LMULMAX2-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: li a1, 85 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: li a1, 51 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: vand.vi v8, v8, 15 +; LMULMAX2-NEXT: vse8.v v8, (a0) +; LMULMAX2-NEXT: ret ; -; LMULMAX2-RV64-LABEL: cttz_v32i8: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: li a1, 32 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: li a1, 85 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: li a1, 51 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX1-LABEL: cttz_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle8.v v8, (a1) +; LMULMAX1-NEXT: vle8.v v9, (a0) +; LMULMAX1-NEXT: li a2, 1 +; LMULMAX1-NEXT: vsub.vx v10, v8, a2 +; LMULMAX1-NEXT: vxor.vi v8, v8, -1 +; LMULMAX1-NEXT: vand.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: li a3, 85 +; LMULMAX1-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: li a4, 51 +; LMULMAX1-NEXT: vand.vx v10, v8, a4 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: vand.vi v8, v8, 15 +; LMULMAX1-NEXT: vsub.vx v10, v9, a2 +; LMULMAX1-NEXT: vxor.vi v9, v9, -1 +; LMULMAX1-NEXT: vand.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a4 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vi v9, v9, 15 +; LMULMAX1-NEXT: vse8.v v9, (a0) +; LMULMAX1-NEXT: vse8.v v8, (a1) +; LMULMAX1-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle8.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: li a3, 85 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: li a4, 51 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: vxor.vi v9, v9, -1 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-RV32-NEXT: vse8.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: cttz_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle8.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a2, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: li a3, 85 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: li a4, 51 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-RV64-NEXT: vse8.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: cttz_v32i8: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX8-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV32-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; LMULMAX8-RV32-NEXT: vzext.vf4 v16, v10 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v16, v16 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wi v12, v16, 23 -; LMULMAX8-RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; LMULMAX8-RV32-NEXT: vnsrl.wx v10, v12, zero -; LMULMAX8-RV32-NEXT: li a1, 127 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v10, a1 -; LMULMAX8-RV32-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_v32i8: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; LMULMAX8-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; LMULMAX8-RV64-NEXT: vzext.vf4 v16, v10 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v16, v16 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wi v12, v16, 23 -; LMULMAX8-RV64-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; LMULMAX8-RV64-NEXT: vnsrl.wx v10, v12, zero -; LMULMAX8-RV64-NEXT: li a1, 127 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: vsub.vx v8, v10, a1 -; LMULMAX8-RV64-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v32i8: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a1, 32 +; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; LMULMAX8-NEXT: vle8.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX8-NEXT: vand.vv v10, v8, v10 +; LMULMAX8-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; LMULMAX8-NEXT: vzext.vf4 v16, v10 +; LMULMAX8-NEXT: vfcvt.f.xu.v v16, v16 +; LMULMAX8-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; LMULMAX8-NEXT: vnsrl.wi v12, v16, 23 +; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; LMULMAX8-NEXT: vnsrl.wx v10, v12, zero +; LMULMAX8-NEXT: li a1, 127 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: vsub.vx v8, v10, a1 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 +; LMULMAX8-NEXT: vse8.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) @@ -1120,37 +914,21 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_v16i16: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX8-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV32-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-RV32-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV32-NEXT: li a1, 127 -; LMULMAX8-RV32-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 16 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_v16i16: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX8-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-RV64-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-RV64-NEXT: li a1, 127 -; LMULMAX8-RV64-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 16 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v16i16: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; LMULMAX8-NEXT: vle16.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX8-NEXT: vand.vv v10, v8, v10 +; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 +; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 +; LMULMAX8-NEXT: li a1, 127 +; LMULMAX8-NEXT: vsub.vx v10, v10, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 16 +; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-NEXT: vse16.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) @@ -1320,39 +1098,22 @@ ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_v8i32: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX8-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV32-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-RV32-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vnsrl.wx v10, v12, a1 -; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV32-NEXT: li a1, 32 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_v8i32: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX8-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-RV64-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vnsrl.wx v10, v12, a1 -; LMULMAX8-RV64-NEXT: li a1, 1023 -; LMULMAX8-RV64-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 32 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v8i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX8-NEXT: vle32.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX8-NEXT: vand.vv v10, v8, v10 +; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vnsrl.wx v10, v12, a1 +; LMULMAX8-NEXT: li a1, 1023 +; LMULMAX8-NEXT: vsub.vx v10, v10, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 32 +; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-NEXT: vse32.v v8, (a0) +; LMULMAX8-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)