diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -1,133 +1,71 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX1-RV64 define void @bitreverse_v8i16(<8 x i16>* %x, <8 x i16>* %y) { -; LMULMAX2-RV32-LABEL: bitreverse_v8i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bitreverse_v8i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bitreverse_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: lui a1, 1 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: lui a1, 3 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 5 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: bitreverse_v8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vsrl.vi v9, v8, 8 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: lui a1, 1 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: lui a1, 3 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vse16.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: bitreverse_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: lui a1, 1 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: lui a1, 3 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 5 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: bitreverse_v8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vsrl.vi v9, v8, 8 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: lui a1, 3 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a1, 5 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vse16.v v8, (a0) +; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) @@ -137,165 +75,85 @@ declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) define void @bitreverse_v4i32(<4 x i32>* %x, <4 x i32>* %y) { -; LMULMAX2-RV32-LABEL: bitreverse_v4i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 16 -; LMULMAX2-RV32-NEXT: addi a1, a1, -256 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX2-RV32-NEXT: vsll.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bitreverse_v4i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 16 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -256 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX2-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bitreverse_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: lui a1, 16 -; LMULMAX1-RV32-NEXT: addi a1, a1, -256 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: lui a1, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: bitreverse_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsrl.vi v9, v8, 8 +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v10, v8, 8 +; RV32-NEXT: lui a1, 4080 +; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vand.vx v9, v9, a1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: bitreverse_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: lui a1, 16 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -256 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: lui a1, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: lui a1, 61681 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: lui a1, 209715 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 349525 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: bitreverse_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vsrl.vi v9, v8, 8 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: addiw a1, a1, -256 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: vor.vv v9, v9, v10 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: lui a1, 4080 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) @@ -305,291 +163,148 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) { -; LMULMAX2-RV32-LABEL: bitreverse_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: li a2, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX2-RV32-NEXT: lui a3, 16 -; LMULMAX2-RV32-NEXT: addi a3, a3, -256 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX2-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX2-RV32-NEXT: li a5, 5 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX2-RV32-NEXT: lui a5, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v11, v11, a5, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX2-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV32-NEXT: li a5, 255 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a5 -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v10, v11, v10 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v11, a3 -; LMULMAX2-RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX2-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v11, v8, a2 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a4 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vand.vv v11, v11, v12 -; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bitreverse_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: li a2, 40 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX2-RV64-NEXT: lui a3, 16 -; LMULMAX2-RV64-NEXT: addiw a3, a3, -256 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX2-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX2-RV64-NEXT: lui a3, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX2-RV64-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX2-RV64-NEXT: li a3, 255 -; LMULMAX2-RV64-NEXT: slli a4, a3, 24 -; LMULMAX2-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX2-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX2-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: slli a4, a3, 32 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX2-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX2-RV64-NEXT: slli a4, a3, 40 -; LMULMAX2-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX2-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX2-RV64-NEXT: vsll.vx v11, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: slli a1, a3, 48 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v11, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_1) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; RV32-LABEL: bitreverse_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: li a5, 5 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vmv.s.x v0, a5 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.i v11, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vor.vv v10, v11, v10 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: li a5, 255 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v10, a5 +; RV32-NEXT: vmerge.vim v10, v10, 0, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsll.vi v11, v8, 8 +; RV32-NEXT: vand.vv v10, v11, v10 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v11, a3 +; RV32-NEXT: vmerge.vim v11, v11, 0, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsll.vi v12, v8, 24 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vor.vv v10, v11, v10 +; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v12, a4 +; RV32-NEXT: vmerge.vim v12, v12, 0, v0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vand.vv v11, v11, v12 +; RV32-NEXT: vsll.vx v8, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v11 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV32-LABEL: bitreverse_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: li a2, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: lui a3, 16 -; LMULMAX1-RV32-NEXT: addi a3, a3, -256 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV32-NEXT: lui a4, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: li a5, 5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV32-NEXT: lui a5, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v11, v11, a5, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: li a5, 255 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a5 -; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a3 -; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v12, a4 -; LMULMAX1-RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: bitreverse_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: li a2, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: lui a3, 16 -; LMULMAX1-RV64-NEXT: addiw a3, a3, -256 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 24 -; LMULMAX1-RV64-NEXT: lui a3, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: li a3, 255 -; LMULMAX1-RV64-NEXT: slli a4, a3, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsll.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: slli a4, a3, 32 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: slli a4, a3, 40 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsll.vx v11, v8, a1 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: slli a1, a3, 48 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_0) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_2) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: bitreverse_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsrl.vx v9, v8, a1 +; RV64-NEXT: li a2, 40 +; RV64-NEXT: vsrl.vx v10, v8, a2 +; RV64-NEXT: lui a3, 16 +; RV64-NEXT: addiw a3, a3, -256 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsrl.vi v11, v8, 8 +; RV64-NEXT: li a3, 255 +; RV64-NEXT: slli a4, a3, 24 +; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: vor.vv v10, v11, v10 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: vand.vx v10, v10, a4 +; RV64-NEXT: vsll.vi v11, v8, 24 +; RV64-NEXT: slli a4, a3, 40 +; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: vor.vv v10, v11, v10 +; RV64-NEXT: vsll.vx v11, v8, a1 +; RV64-NEXT: vsll.vx v8, v8, a2 +; RV64-NEXT: slli a1, a3, 48 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v11, v8 +; RV64-NEXT: lui a1, %hi(.LCPI2_0) +; RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: lui a1, %hi(.LCPI2_1) +; RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: lui a1, %hi(.LCPI2_2) +; RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -568,8 +568,8 @@ } ; FIXME: Use vse64.v on RV32 to avoid two scalar extracts and two scalar stores. -define void @store_extractelt_v4i64(<2 x i64>* %x, i64* %p) nounwind { -; RV32-LABEL: store_extractelt_v4i64: +define void @store_extractelt_v2i64(<2 x i64>* %x, i64* %p) nounwind { +; RV32-LABEL: store_extractelt_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) @@ -583,7 +583,7 @@ ; RV32-NEXT: sw a0, 4(a1) ; RV32-NEXT: ret ; -; RV64-LABEL: store_extractelt_v4i64: +; RV64-LABEL: store_extractelt_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) @@ -597,8 +597,8 @@ ret void } -define void @store_extractelt_v4f64(<2 x double>* %x, double* %p) nounwind { -; CHECK-LABEL: store_extractelt_v4f64: +define void @store_extractelt_v2f64(<2 x double>* %x, double* %p) nounwind { +; CHECK-LABEL: store_extractelt_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -103,7 +103,7 @@ ; FIXME: This could generate vrsub.vi but the (ISD::MUL X, -1) we generate ; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before ; being combined. -define void @buildvec_vid_stepn1_add0_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x +define void @buildvec_vid_stepn1_add0_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3) { ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu @@ -114,7 +114,6 @@ ; CHECK-NEXT: vse8.v v8, (a2) ; CHECK-NEXT: vse8.v v8, (a3) ; CHECK-NEXT: ret -i8>* %z2, <4 x i8>* %z3) { store <4 x i8> , <4 x i8>* %z0 store <4 x i8> , <4 x i8>* %z1 store <4 x i8> , <4 x i8>* %z2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { ; CHECK-LABEL: vnsra_v8i16_v8i8_scalar: @@ -34,26 +34,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vnsra.wx v8, v8, a0 ; CHECK-NEXT: ret -; RV32-LABEL: vnsra_v2i64_v2i32_scalar: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli zero, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsra.vv v25, v8, v25 -; RV32-NEXT: vsetivli zero, 2, e32,mf2,ta,mu -; RV32-NEXT: vnsrl.wi v8, v25, 0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; RV64-LABEL: vnsra_v2i64_v2i32_scalar: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32,mf2,ta,mu -; RV64-NEXT: vnsra.wx v25, v8, a0 -; RV64-NEXT: vmv1r.v v8, v25 -; RV64-NEXT: ret %insert = insertelement <2 x i64> poison, i64 %y, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer %a = ashr <2 x i64> %x, %splat @@ -126,26 +106,6 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vnsrl.wx v8, v8, a0 ; CHECK-NEXT: ret -; RV32-LABEL: vnsrl_v2i64_v2i32_scalar: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli zero, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsrl.vv v25, v8, v25 -; RV32-NEXT: vsetivli zero, 2, e32,mf2,ta,mu -; RV32-NEXT: vnsrl.wi v8, v25, 0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; RV64-LABEL: vnsrl_v2i64_v2i32_scalar: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32,mf2,ta,mu -; RV64-NEXT: vnsrl.wx v25, v8, a0 -; RV64-NEXT: vmv1r.v v8, v25 -; RV64-NEXT: ret %insert = insertelement <2 x i64> poison, i64 %y, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer %a = lshr <2 x i64> %x, %splat