Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll @@ -0,0 +1,3553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+experimental-zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+experimental-zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+experimental-zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+experimental-zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512 + +; +; VECTOR_REVERSE - masks +; + +define <2 x i1> @reverse_v2i1(<2 x i1> %a) { +; CHECK-LABEL: reverse_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %res = call <2 x i1> @llvm.experimental.vector.reverse.v2i1(<2 x i1> %a) + ret <2 x i1> %res +} + +define <4 x i1> @reverse_v4i1(<4 x i1> %a) { +; CHECK-LABEL: reverse_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a0, sp, 15 +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: addi a0, sp, 14 +; CHECK-NEXT: vse8.v v9, (a0) +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: addi a0, sp, 13 +; CHECK-NEXT: vse8.v v9, (a0) +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: addi a0, sp, 12 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %res = call <4 x i1> @llvm.experimental.vector.reverse.v4i1(<4 x i1> %a) + ret <4 x i1> %res +} + +define <8 x i1> @reverse_v8i1(<8 x i1> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v8i1: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a0, 8(sp) +; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 8 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v8i1: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: addi sp, sp, -16 +; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, mu +; RV32-BITS-256-NEXT: vmv.x.s a0, v0 +; RV32-BITS-256-NEXT: andi a1, a0, 1 +; RV32-BITS-256-NEXT: sb a1, 15(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 30 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 14(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 29 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 13(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 12(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 27 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 11(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 10(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 9(sp) +; RV32-BITS-256-NEXT: slli a0, a0, 24 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: sb a0, 8(sp) +; RV32-BITS-256-NEXT: addi a0, sp, 8 +; RV32-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-BITS-256-NEXT: vle8.v v8, (a0) +; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: addi sp, sp, 16 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v8i1: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: addi sp, sp, -16 +; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, mu +; RV32-BITS-512-NEXT: vmv.x.s a0, v0 +; RV32-BITS-512-NEXT: andi a1, a0, 1 +; RV32-BITS-512-NEXT: sb a1, 15(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 30 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 14(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 13(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 12(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 27 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 11(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 10(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 9(sp) +; RV32-BITS-512-NEXT: slli a0, a0, 24 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: sb a0, 8(sp) +; RV32-BITS-512-NEXT: addi a0, sp, 8 +; RV32-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-BITS-512-NEXT: vle8.v v8, (a0) +; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: addi sp, sp, 16 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v8i1: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a0, 8(sp) +; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 8 +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v8i1: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: addi sp, sp, -16 +; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, mu +; RV64-BITS-256-NEXT: vmv.x.s a0, v0 +; RV64-BITS-256-NEXT: andi a1, a0, 1 +; RV64-BITS-256-NEXT: sb a1, 15(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 62 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 14(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 13(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 12(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 59 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 11(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 10(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 9(sp) +; RV64-BITS-256-NEXT: slli a0, a0, 56 +; RV64-BITS-256-NEXT: srli a0, a0, 63 +; RV64-BITS-256-NEXT: sb a0, 8(sp) +; RV64-BITS-256-NEXT: addi a0, sp, 8 +; RV64-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-BITS-256-NEXT: vle8.v v8, (a0) +; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: addi sp, sp, 16 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v8i1: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: addi sp, sp, -16 +; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, mu +; RV64-BITS-512-NEXT: vmv.x.s a0, v0 +; RV64-BITS-512-NEXT: andi a1, a0, 1 +; RV64-BITS-512-NEXT: sb a1, 15(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 62 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 14(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 13(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 12(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 59 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 11(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 10(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 9(sp) +; RV64-BITS-512-NEXT: slli a0, a0, 56 +; RV64-BITS-512-NEXT: srli a0, a0, 63 +; RV64-BITS-512-NEXT: sb a0, 8(sp) +; RV64-BITS-512-NEXT: addi a0, sp, 8 +; RV64-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-BITS-512-NEXT: vle8.v v8, (a0) +; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: addi sp, sp, 16 +; RV64-BITS-512-NEXT: ret + %res = call <8 x i1> @llvm.experimental.vector.reverse.v8i1(<8 x i1> %a) + ret <8 x i1> %res +} + +define <16 x i1> @reverse_v16i1(<16 x i1> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v16i1: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 7(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 6(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 5(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 4(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 3(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 2(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a0, 0(sp) +; RV32-BITS-UNKNOWN-NEXT: mv a0, sp +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v16i1: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: addi sp, sp, -16 +; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, mu +; RV32-BITS-256-NEXT: vmv.x.s a0, v0 +; RV32-BITS-256-NEXT: andi a1, a0, 1 +; RV32-BITS-256-NEXT: sb a1, 15(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 30 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 14(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 29 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 13(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 12(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 27 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 11(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 10(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 9(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 24 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 8(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 23 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 7(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 22 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 6(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 21 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 5(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 20 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 4(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 19 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 3(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 18 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 2(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 17 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 1(sp) +; RV32-BITS-256-NEXT: slli a0, a0, 16 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: sb a0, 0(sp) +; RV32-BITS-256-NEXT: mv a0, sp +; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-BITS-256-NEXT: vle8.v v8, (a0) +; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: addi sp, sp, 16 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v16i1: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: addi sp, sp, -16 +; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, mu +; RV32-BITS-512-NEXT: vmv.x.s a0, v0 +; RV32-BITS-512-NEXT: andi a1, a0, 1 +; RV32-BITS-512-NEXT: sb a1, 15(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 30 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 14(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 13(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 12(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 27 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 11(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 10(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 9(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 24 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 8(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 23 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 7(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 22 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 6(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 21 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 5(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 20 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 4(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 19 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 3(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 18 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 2(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 17 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 1(sp) +; RV32-BITS-512-NEXT: slli a0, a0, 16 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: sb a0, 0(sp) +; RV32-BITS-512-NEXT: mv a0, sp +; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-BITS-512-NEXT: vle8.v v8, (a0) +; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: addi sp, sp, 16 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v16i1: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 7(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 6(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 5(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 4(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 3(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 2(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 48 +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a0, 0(sp) +; RV64-BITS-UNKNOWN-NEXT: mv a0, sp +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v16i1: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: addi sp, sp, -16 +; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, mu +; RV64-BITS-256-NEXT: vmv.x.s a0, v0 +; RV64-BITS-256-NEXT: andi a1, a0, 1 +; RV64-BITS-256-NEXT: sb a1, 15(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 62 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 14(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 13(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 12(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 59 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 11(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 10(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 9(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 56 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 8(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 55 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 7(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 54 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 6(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 53 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 5(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 52 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 4(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 51 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 3(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 50 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 2(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 49 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 1(sp) +; RV64-BITS-256-NEXT: slli a0, a0, 48 +; RV64-BITS-256-NEXT: srli a0, a0, 63 +; RV64-BITS-256-NEXT: sb a0, 0(sp) +; RV64-BITS-256-NEXT: mv a0, sp +; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-BITS-256-NEXT: vle8.v v8, (a0) +; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: addi sp, sp, 16 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v16i1: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: addi sp, sp, -16 +; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, mu +; RV64-BITS-512-NEXT: vmv.x.s a0, v0 +; RV64-BITS-512-NEXT: andi a1, a0, 1 +; RV64-BITS-512-NEXT: sb a1, 15(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 62 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 14(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 13(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 12(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 59 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 11(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 10(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 9(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 56 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 8(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 55 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 7(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 54 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 6(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 53 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 5(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 52 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 4(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 51 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 3(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 50 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 2(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 49 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 1(sp) +; RV64-BITS-512-NEXT: slli a0, a0, 48 +; RV64-BITS-512-NEXT: srli a0, a0, 63 +; RV64-BITS-512-NEXT: sb a0, 0(sp) +; RV64-BITS-512-NEXT: mv a0, sp +; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-BITS-512-NEXT: vle8.v v8, (a0) +; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: addi sp, sp, 16 +; RV64-BITS-512-NEXT: ret + %res = call <16 x i1> @llvm.experimental.vector.reverse.v16i1(<16 x i1> %a) + ret <16 x i1> %res +} + +define <32 x i1> @reverse_v32i1(<32 x i1> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v32i1: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -64 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 64 +; RV32-BITS-UNKNOWN-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-BITS-UNKNOWN-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4 +; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8 +; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 64 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 +; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -32 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 31(sp) +; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 0(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 30(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 29(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 28(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 27(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 26(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 25(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 24(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 23(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 22(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 21(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 20(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 19(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 18(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 17(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 16(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 7(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 6(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 5(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 4(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 3(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 2(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a0, 1(sp) +; RV32-BITS-UNKNOWN-NEXT: li a0, 32 +; RV32-BITS-UNKNOWN-NEXT: mv a1, sp +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -64 +; RV32-BITS-UNKNOWN-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-BITS-UNKNOWN-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 64 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v32i1: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: addi sp, sp, -64 +; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 64 +; RV32-BITS-256-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-BITS-256-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-BITS-256-NEXT: .cfi_offset ra, -4 +; RV32-BITS-256-NEXT: .cfi_offset s0, -8 +; RV32-BITS-256-NEXT: addi s0, sp, 64 +; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0 +; RV32-BITS-256-NEXT: andi sp, sp, -32 +; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV32-BITS-256-NEXT: vmv.x.s a0, v0 +; RV32-BITS-256-NEXT: andi a1, a0, 1 +; RV32-BITS-256-NEXT: sb a1, 31(sp) +; RV32-BITS-256-NEXT: srli a1, a0, 31 +; RV32-BITS-256-NEXT: sb a1, 0(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 30 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 30(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 29 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 29(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 28(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 27 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 27(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 26(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 25(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 24 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 24(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 23 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 23(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 22 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 22(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 21 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 21(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 20 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 20(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 19 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 19(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 18 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 18(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 17 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 17(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 16 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 16(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 15 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 15(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 14 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 14(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 13 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 13(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 12 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 12(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 11 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 11(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 10 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 10(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 9 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 9(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 8 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 8(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 7 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 7(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 6 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 6(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 5 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 5(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 4 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 4(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 3(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 2(sp) +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: sb a0, 1(sp) +; RV32-BITS-256-NEXT: li a0, 32 +; RV32-BITS-256-NEXT: mv a1, sp +; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; RV32-BITS-256-NEXT: vle8.v v8, (a1) +; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: addi sp, s0, -64 +; RV32-BITS-256-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-BITS-256-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-BITS-256-NEXT: addi sp, sp, 64 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v32i1: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: addi sp, sp, -64 +; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 64 +; RV32-BITS-512-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-BITS-512-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-BITS-512-NEXT: .cfi_offset ra, -4 +; RV32-BITS-512-NEXT: .cfi_offset s0, -8 +; RV32-BITS-512-NEXT: addi s0, sp, 64 +; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0 +; RV32-BITS-512-NEXT: andi sp, sp, -32 +; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV32-BITS-512-NEXT: vmv.x.s a0, v0 +; RV32-BITS-512-NEXT: andi a1, a0, 1 +; RV32-BITS-512-NEXT: sb a1, 31(sp) +; RV32-BITS-512-NEXT: srli a1, a0, 31 +; RV32-BITS-512-NEXT: sb a1, 0(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 30 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 30(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 29(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 28(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 27 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 27(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 26(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 25(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 24 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 24(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 23 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 23(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 22 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 22(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 21 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 21(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 20 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 20(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 19 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 19(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 18 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 18(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 17 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 17(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 16 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 16(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 15 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 15(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 14 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 14(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 13 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 13(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 12 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 12(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 11 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 11(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 10 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 10(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 9 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 9(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 8 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 8(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 7 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 7(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 6 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 6(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 5 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 5(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 4 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 4(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 3(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 2(sp) +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: sb a0, 1(sp) +; RV32-BITS-512-NEXT: li a0, 32 +; RV32-BITS-512-NEXT: mv a1, sp +; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; RV32-BITS-512-NEXT: vle8.v v8, (a1) +; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: addi sp, s0, -64 +; RV32-BITS-512-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-BITS-512-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-BITS-512-NEXT: addi sp, sp, 64 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v32i1: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -64 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 64 +; RV64-BITS-UNKNOWN-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-BITS-UNKNOWN-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8 +; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16 +; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 64 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 +; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -32 +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 31(sp) +; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 0(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 30(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 29(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 28(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 27(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 26(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 25(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 24(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 23(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 22(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 21(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 20(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 19(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 18(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 17(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 16(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 7(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 6(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 5(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 4(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 3(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 2(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 33 +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a0, 1(sp) +; RV64-BITS-UNKNOWN-NEXT: li a0, 32 +; RV64-BITS-UNKNOWN-NEXT: mv a1, sp +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -64 +; RV64-BITS-UNKNOWN-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-BITS-UNKNOWN-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 64 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v32i1: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: addi sp, sp, -64 +; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 64 +; RV64-BITS-256-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-BITS-256-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-BITS-256-NEXT: .cfi_offset ra, -8 +; RV64-BITS-256-NEXT: .cfi_offset s0, -16 +; RV64-BITS-256-NEXT: addi s0, sp, 64 +; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0 +; RV64-BITS-256-NEXT: andi sp, sp, -32 +; RV64-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV64-BITS-256-NEXT: vmv.x.s a0, v0 +; RV64-BITS-256-NEXT: andi a1, a0, 1 +; RV64-BITS-256-NEXT: sb a1, 31(sp) +; RV64-BITS-256-NEXT: srliw a1, a0, 31 +; RV64-BITS-256-NEXT: sb a1, 0(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 62 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 30(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 29(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 28(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 59 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 27(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 26(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 25(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 56 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 24(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 55 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 23(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 54 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 22(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 53 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 21(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 52 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 20(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 51 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 19(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 50 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 18(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 49 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 17(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 48 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 16(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 47 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 15(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 46 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 14(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 45 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 13(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 44 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 12(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 43 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 11(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 42 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 10(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 41 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 9(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 40 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 8(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 39 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 7(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 38 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 6(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 37 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 5(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 36 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 4(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 35 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 3(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 34 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 2(sp) +; RV64-BITS-256-NEXT: slli a0, a0, 33 +; RV64-BITS-256-NEXT: srli a0, a0, 63 +; RV64-BITS-256-NEXT: sb a0, 1(sp) +; RV64-BITS-256-NEXT: li a0, 32 +; RV64-BITS-256-NEXT: mv a1, sp +; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; RV64-BITS-256-NEXT: vle8.v v8, (a1) +; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: addi sp, s0, -64 +; RV64-BITS-256-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-BITS-256-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-BITS-256-NEXT: addi sp, sp, 64 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v32i1: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: addi sp, sp, -64 +; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 64 +; RV64-BITS-512-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-BITS-512-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-BITS-512-NEXT: .cfi_offset ra, -8 +; RV64-BITS-512-NEXT: .cfi_offset s0, -16 +; RV64-BITS-512-NEXT: addi s0, sp, 64 +; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0 +; RV64-BITS-512-NEXT: andi sp, sp, -32 +; RV64-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV64-BITS-512-NEXT: vmv.x.s a0, v0 +; RV64-BITS-512-NEXT: andi a1, a0, 1 +; RV64-BITS-512-NEXT: sb a1, 31(sp) +; RV64-BITS-512-NEXT: srliw a1, a0, 31 +; RV64-BITS-512-NEXT: sb a1, 0(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 62 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 30(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 29(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 28(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 59 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 27(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 26(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 25(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 56 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 24(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 55 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 23(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 54 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 22(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 53 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 21(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 52 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 20(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 51 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 19(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 50 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 18(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 49 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 17(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 48 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 16(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 47 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 15(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 46 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 14(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 45 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 13(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 44 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 12(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 43 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 11(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 42 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 10(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 41 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 9(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 40 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 8(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 39 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 7(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 38 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 6(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 37 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 5(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 36 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 4(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 35 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 3(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 34 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 2(sp) +; RV64-BITS-512-NEXT: slli a0, a0, 33 +; RV64-BITS-512-NEXT: srli a0, a0, 63 +; RV64-BITS-512-NEXT: sb a0, 1(sp) +; RV64-BITS-512-NEXT: li a0, 32 +; RV64-BITS-512-NEXT: mv a1, sp +; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; RV64-BITS-512-NEXT: vle8.v v8, (a1) +; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: addi sp, s0, -64 +; RV64-BITS-512-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-BITS-512-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-BITS-512-NEXT: addi sp, sp, 64 +; RV64-BITS-512-NEXT: ret + %res = call <32 x i1> @llvm.experimental.vector.reverse.v32i1(<32 x i1> %a) + ret <32 x i1> %res +} + +define <64 x i1> @reverse_v64i1(<64 x i1> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v64i1: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -128 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 128 +; RV32-BITS-UNKNOWN-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-BITS-UNKNOWN-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4 +; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8 +; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 128 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 +; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 63(sp) +; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 32(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 62(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 61(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 60(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 59(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 58(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 57(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 56(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 55(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 54(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 53(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 52(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 51(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 50(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 49(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 48(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 47(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 46(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 45(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 44(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 43(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 42(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 41(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 40(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 39(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 38(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 37(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 36(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 35(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 34(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a0, 33(sp) +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vslidedown.vi v8, v0, 1 +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v8 +; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 31(sp) +; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 0(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 30(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 29(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 28(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 27(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 26(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 25(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 24(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 23(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 22(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 21(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 20(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 19(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 18(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 17(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 16(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 7(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 6(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 5(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 4(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 3(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a1, 2(sp) +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: sb a0, 1(sp) +; RV32-BITS-UNKNOWN-NEXT: li a0, 64 +; RV32-BITS-UNKNOWN-NEXT: mv a1, sp +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -128 +; RV32-BITS-UNKNOWN-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-BITS-UNKNOWN-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 128 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v64i1: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: addi sp, sp, -128 +; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 128 +; RV32-BITS-256-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-BITS-256-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-BITS-256-NEXT: .cfi_offset ra, -4 +; RV32-BITS-256-NEXT: .cfi_offset s0, -8 +; RV32-BITS-256-NEXT: addi s0, sp, 128 +; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0 +; RV32-BITS-256-NEXT: andi sp, sp, -64 +; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV32-BITS-256-NEXT: vmv.x.s a0, v0 +; RV32-BITS-256-NEXT: andi a1, a0, 1 +; RV32-BITS-256-NEXT: sb a1, 63(sp) +; RV32-BITS-256-NEXT: srli a1, a0, 31 +; RV32-BITS-256-NEXT: sb a1, 32(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 30 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 62(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 29 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 61(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 60(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 27 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 59(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 58(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 57(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 24 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 56(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 23 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 55(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 22 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 54(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 21 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 53(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 20 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 52(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 19 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 51(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 18 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 50(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 17 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 49(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 16 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 48(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 15 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 47(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 14 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 46(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 13 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 45(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 12 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 44(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 11 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 43(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 10 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 42(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 9 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 41(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 8 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 40(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 7 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 39(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 6 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 38(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 5 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 37(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 4 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 36(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 35(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 34(sp) +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: sb a0, 33(sp) +; RV32-BITS-256-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; RV32-BITS-256-NEXT: vslidedown.vi v8, v0, 1 +; RV32-BITS-256-NEXT: vmv.x.s a0, v8 +; RV32-BITS-256-NEXT: andi a1, a0, 1 +; RV32-BITS-256-NEXT: sb a1, 31(sp) +; RV32-BITS-256-NEXT: srli a1, a0, 31 +; RV32-BITS-256-NEXT: sb a1, 0(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 30 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 30(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 29 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 29(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 28(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 27 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 27(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 26(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 25(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 24 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 24(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 23 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 23(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 22 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 22(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 21 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 21(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 20 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 20(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 19 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 19(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 18 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 18(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 17 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 17(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 16 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 16(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 15 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 15(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 14 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 14(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 13 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 13(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 12 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 12(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 11 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 11(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 10 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 10(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 9 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 9(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 8 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 8(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 7 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 7(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 6 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 6(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 5 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 5(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 4 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 4(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 3(sp) +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: sb a1, 2(sp) +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: sb a0, 1(sp) +; RV32-BITS-256-NEXT: li a0, 64 +; RV32-BITS-256-NEXT: mv a1, sp +; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; RV32-BITS-256-NEXT: vle8.v v8, (a1) +; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: addi sp, s0, -128 +; RV32-BITS-256-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-BITS-256-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-BITS-256-NEXT: addi sp, sp, 128 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v64i1: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: addi sp, sp, -128 +; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 128 +; RV32-BITS-512-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-BITS-512-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-BITS-512-NEXT: .cfi_offset ra, -4 +; RV32-BITS-512-NEXT: .cfi_offset s0, -8 +; RV32-BITS-512-NEXT: addi s0, sp, 128 +; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0 +; RV32-BITS-512-NEXT: andi sp, sp, -64 +; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; RV32-BITS-512-NEXT: vmv.x.s a0, v0 +; RV32-BITS-512-NEXT: andi a1, a0, 1 +; RV32-BITS-512-NEXT: sb a1, 63(sp) +; RV32-BITS-512-NEXT: srli a1, a0, 31 +; RV32-BITS-512-NEXT: sb a1, 32(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 30 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 62(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 61(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 60(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 27 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 59(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 58(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 57(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 24 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 56(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 23 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 55(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 22 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 54(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 21 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 53(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 20 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 52(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 19 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 51(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 18 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 50(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 17 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 49(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 16 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 48(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 15 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 47(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 14 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 46(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 13 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 45(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 12 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 44(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 11 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 43(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 10 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 42(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 9 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 41(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 8 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 40(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 7 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 39(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 6 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 38(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 5 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 37(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 4 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 36(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 35(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 34(sp) +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: sb a0, 33(sp) +; RV32-BITS-512-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; RV32-BITS-512-NEXT: vslidedown.vi v8, v0, 1 +; RV32-BITS-512-NEXT: vmv.x.s a0, v8 +; RV32-BITS-512-NEXT: andi a1, a0, 1 +; RV32-BITS-512-NEXT: sb a1, 31(sp) +; RV32-BITS-512-NEXT: srli a1, a0, 31 +; RV32-BITS-512-NEXT: sb a1, 0(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 30 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 30(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 29(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 28(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 27 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 27(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 26(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 25(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 24 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 24(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 23 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 23(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 22 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 22(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 21 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 21(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 20 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 20(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 19 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 19(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 18 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 18(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 17 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 17(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 16 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 16(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 15 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 15(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 14 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 14(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 13 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 13(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 12 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 12(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 11 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 11(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 10 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 10(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 9 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 9(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 8 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 8(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 7 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 7(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 6 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 6(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 5 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 5(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 4 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 4(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 3(sp) +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: sb a1, 2(sp) +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: sb a0, 1(sp) +; RV32-BITS-512-NEXT: li a0, 64 +; RV32-BITS-512-NEXT: mv a1, sp +; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; RV32-BITS-512-NEXT: vle8.v v8, (a1) +; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: addi sp, s0, -128 +; RV32-BITS-512-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-BITS-512-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-BITS-512-NEXT: addi sp, sp, 128 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v64i1: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -128 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 128 +; RV64-BITS-UNKNOWN-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-BITS-UNKNOWN-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8 +; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16 +; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 128 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 +; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64 +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV64-BITS-UNKNOWN-NEXT: andi a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 63(sp) +; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 32(sp) +; RV64-BITS-UNKNOWN-NEXT: srli a1, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 0(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 62(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 61(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 60(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 59(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 58(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 57(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 56(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 55(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 54(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 53(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 52(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 51(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 50(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 49(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 48(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 47(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 46(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 45(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 44(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 43(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 42(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 41(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 40(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 39(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 38(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 37(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 36(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 35(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 34(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 33 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 33(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 31 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 31(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 30(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 29(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 28(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 27(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 26(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 25(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 24(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 23 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 23(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 22(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 21(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 20(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 19(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 18(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 17(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 16(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 15 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 15(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 14 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 13 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 12 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 11 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 10 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 9 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 8 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 7 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 7(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 6 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 6(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 5(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 4(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 3(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a1, 2(sp) +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: sb a0, 1(sp) +; RV64-BITS-UNKNOWN-NEXT: li a0, 64 +; RV64-BITS-UNKNOWN-NEXT: mv a1, sp +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -128 +; RV64-BITS-UNKNOWN-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-BITS-UNKNOWN-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 128 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v64i1: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: addi sp, sp, -128 +; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 128 +; RV64-BITS-256-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-BITS-256-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-BITS-256-NEXT: .cfi_offset ra, -8 +; RV64-BITS-256-NEXT: .cfi_offset s0, -16 +; RV64-BITS-256-NEXT: addi s0, sp, 128 +; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0 +; RV64-BITS-256-NEXT: andi sp, sp, -64 +; RV64-BITS-256-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; RV64-BITS-256-NEXT: vmv.x.s a0, v0 +; RV64-BITS-256-NEXT: andi a1, a0, 1 +; RV64-BITS-256-NEXT: sb a1, 63(sp) +; RV64-BITS-256-NEXT: srliw a1, a0, 31 +; RV64-BITS-256-NEXT: sb a1, 32(sp) +; RV64-BITS-256-NEXT: srli a1, a0, 63 +; RV64-BITS-256-NEXT: sb a1, 0(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 62 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 62(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 61(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 60(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 59 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 59(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 58(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 57(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 56 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 56(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 55 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 55(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 54 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 54(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 53 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 53(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 52 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 52(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 51 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 51(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 50 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 50(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 49 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 49(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 48 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 48(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 47 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 47(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 46 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 46(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 45 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 45(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 44 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 44(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 43 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 43(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 42 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 42(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 41 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 41(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 40 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 40(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 39 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 39(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 38 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 38(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 37 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 37(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 36 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 36(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 35 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 35(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 34 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 34(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 33 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 33(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 31 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 31(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 30 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 30(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 29 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 29(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 28 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 28(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 27 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 27(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 26 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 26(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 25 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 25(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 24 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 24(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 23 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 23(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 22 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 22(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 21 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 21(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 20 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 20(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 19 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 19(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 18 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 18(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 17 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 17(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 16 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 16(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 15 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 15(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 14 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 14(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 13 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 13(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 12 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 12(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 11 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 11(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 10 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 10(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 9 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 9(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 8 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 8(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 7 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 7(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 6 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 6(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 5 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 5(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 4 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 4(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 3(sp) +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: sb a1, 2(sp) +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: srli a0, a0, 63 +; RV64-BITS-256-NEXT: sb a0, 1(sp) +; RV64-BITS-256-NEXT: li a0, 64 +; RV64-BITS-256-NEXT: mv a1, sp +; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; RV64-BITS-256-NEXT: vle8.v v8, (a1) +; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: addi sp, s0, -128 +; RV64-BITS-256-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-BITS-256-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-BITS-256-NEXT: addi sp, sp, 128 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v64i1: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: addi sp, sp, -128 +; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 128 +; RV64-BITS-512-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-BITS-512-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-BITS-512-NEXT: .cfi_offset ra, -8 +; RV64-BITS-512-NEXT: .cfi_offset s0, -16 +; RV64-BITS-512-NEXT: addi s0, sp, 128 +; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0 +; RV64-BITS-512-NEXT: andi sp, sp, -64 +; RV64-BITS-512-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; RV64-BITS-512-NEXT: vmv.x.s a0, v0 +; RV64-BITS-512-NEXT: andi a1, a0, 1 +; RV64-BITS-512-NEXT: sb a1, 63(sp) +; RV64-BITS-512-NEXT: srliw a1, a0, 31 +; RV64-BITS-512-NEXT: sb a1, 32(sp) +; RV64-BITS-512-NEXT: srli a1, a0, 63 +; RV64-BITS-512-NEXT: sb a1, 0(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 62 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 62(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 61(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 60(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 59 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 59(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 58(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 57(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 56 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 56(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 55 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 55(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 54 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 54(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 53 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 53(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 52 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 52(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 51 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 51(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 50 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 50(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 49 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 49(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 48 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 48(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 47 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 47(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 46 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 46(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 45 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 45(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 44 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 44(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 43 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 43(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 42 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 42(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 41 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 41(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 40 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 40(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 39 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 39(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 38 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 38(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 37 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 37(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 36 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 36(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 35 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 35(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 34 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 34(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 33 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 33(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 31 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 31(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 30 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 30(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 29 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 29(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 28 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 28(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 27 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 27(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 26 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 26(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 25 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 25(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 24 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 24(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 23 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 23(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 22 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 22(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 21 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 21(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 20 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 20(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 19 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 19(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 18 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 18(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 17 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 17(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 16 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 16(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 15 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 15(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 14 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 14(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 13 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 13(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 12 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 12(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 11 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 11(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 10 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 10(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 9 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 9(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 8 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 8(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 7 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 7(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 6 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 6(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 5 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 5(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 4 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 4(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 3 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 3(sp) +; RV64-BITS-512-NEXT: slli a1, a0, 2 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: sb a1, 2(sp) +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: srli a0, a0, 63 +; RV64-BITS-512-NEXT: sb a0, 1(sp) +; RV64-BITS-512-NEXT: li a0, 64 +; RV64-BITS-512-NEXT: mv a1, sp +; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; RV64-BITS-512-NEXT: vle8.v v8, (a1) +; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 +; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: addi sp, s0, -128 +; RV64-BITS-512-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-BITS-512-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-BITS-512-NEXT: addi sp, sp, 128 +; RV64-BITS-512-NEXT: ret + %res = call <64 x i1> @llvm.experimental.vector.reverse.v64i1(<64 x i1> %a) + ret <64 x i1> %res +} + + +define <1 x i8> @reverse_v1i8(<1 x i8> %a) { +; CHECK-LABEL: reverse_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call <1 x i8> @llvm.experimental.vector.reverse.v1i8(<1 x i8> %a) + ret <1 x i8> %res +} + +define <2 x i8> @reverse_v2i8(<2 x i8> %a) { +; CHECK-LABEL: reverse_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a) + ret <2 x i8> %res +} + +define <4 x i8> @reverse_v4i8(<4 x i8> %a) { +; CHECK-LABEL: reverse_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.experimental.vector.reverse.v4i8(<4 x i8> %a) + ret <4 x i8> %res +} + +define <8 x i8> @reverse_v8i8(<8 x i8> %a) { +; CHECK-LABEL: reverse_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 7 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <8 x i8> @llvm.experimental.vector.reverse.v8i8(<8 x i8> %a) + ret <8 x i8> %res +} + +define <16 x i8> @reverse_v16i8(<16 x i8> %a) { +; CHECK-LABEL: reverse_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 15 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a) + ret <16 x i8> %res +} + +define <32 x i8> @reverse_v32i8(<32 x i8> %a) { +; CHECK-LABEL: reverse_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %res = call <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8> %a) + ret <32 x i8> %res +} + +define <64 x i8> @reverse_v64i8(<64 x i8> %a) { +; CHECK-LABEL: reverse_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %res = call <64 x i8> @llvm.experimental.vector.reverse.v64i8(<64 x i8> %a) + ret <64 x i8> %res +} + +define <1 x i16> @reverse_v1i16(<1 x i16> %a) { +; CHECK-LABEL: reverse_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call <1 x i16> @llvm.experimental.vector.reverse.v1i16(<1 x i16> %a) + ret <1 x i16> %res +} + +define <2 x i16> @reverse_v2i16(<2 x i16> %a) { +; CHECK-LABEL: reverse_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.experimental.vector.reverse.v2i16(<2 x i16> %a) + ret <2 x i16> %res +} + +define <4 x i16> @reverse_v4i16(<4 x i16> %a) { +; CHECK-LABEL: reverse_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <4 x i16> @llvm.experimental.vector.reverse.v4i16(<4 x i16> %a) + ret <4 x i16> %res +} + +define <8 x i16> @reverse_v8i16(<8 x i16> %a) { +; CHECK-LABEL: reverse_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 7 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a) + ret <8 x i16> %res +} + +define <16 x i16> @reverse_v16i16(<16 x i16> %a) { +; CHECK-LABEL: reverse_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 15 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %res = call <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16> %a) + ret <16 x i16> %res +} + +define <32 x i16> @reverse_v32i16(<32 x i16> %a) { +; CHECK-LABEL: reverse_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %res = call <32 x i16> @llvm.experimental.vector.reverse.v32i16(<32 x i16> %a) + ret <32 x i16> %res +} + +define <1 x i32> @reverse_v1i32(<1 x i32> %a) { +; CHECK-LABEL: reverse_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call <1 x i32> @llvm.experimental.vector.reverse.v1i32(<1 x i32> %a) + ret <1 x i32> %res +} + +define <2 x i32> @reverse_v2i32(<2 x i32> %a) { +; CHECK-LABEL: reverse_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <2 x i32> @llvm.experimental.vector.reverse.v2i32(<2 x i32> %a) + ret <2 x i32> %res +} + +define <4 x i32> @reverse_v4i32(<4 x i32> %a) { +; CHECK-LABEL: reverse_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a) + ret <4 x i32> %res +} + +define <8 x i32> @reverse_v8i32(<8 x i32> %a) { +; CHECK-LABEL: reverse_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a) + ret <8 x i32> %res +} + +define <16 x i32> @reverse_v16i32(<16 x i32> %a) { +; CHECK-LABEL: reverse_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vi v16, v12, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %res = call <16 x i32> @llvm.experimental.vector.reverse.v16i32(<16 x i32> %a) + ret <16 x i32> %res +} + +define <1 x i64> @reverse_v1i64(<1 x i64> %a) { +; CHECK-LABEL: reverse_v1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call <1 x i64> @llvm.experimental.vector.reverse.v1i64(<1 x i64> %a) + ret <1 x i64> %res +} + +define <2 x i64> @reverse_v2i64(<2 x i64> %a) { +; CHECK-LABEL: reverse_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a) + ret <2 x i64> %res +} + +define <4 x i64> @reverse_v4i64(<4 x i64> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v4i64: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v10 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v10 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v4i64: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-BITS-256-NEXT: vid.v v10 +; RV32-BITS-256-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-256-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-BITS-256-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-256-NEXT: vmv.v.v v8, v10 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v4i64: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-BITS-512-NEXT: vid.v v10 +; RV32-BITS-512-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-BITS-512-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-512-NEXT: vmv.v.v v8, v10 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v4i64: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v10 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v10 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v4i64: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-256-NEXT: vid.v v10 +; RV64-BITS-256-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-256-NEXT: vmv.v.v v8, v10 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v4i64: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-512-NEXT: vid.v v10 +; RV64-BITS-512-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-512-NEXT: vmv.v.v v8, v10 +; RV64-BITS-512-NEXT: ret + %res = call <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64> %a) + ret <4 x i64> %res +} + +define <8 x i64> @reverse_v8i64(<8 x i64> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v8i64: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v12 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v12 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v8i64: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-BITS-256-NEXT: vid.v v12 +; RV32-BITS-256-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-256-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-BITS-256-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-256-NEXT: vmv.v.v v8, v12 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v8i64: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-BITS-512-NEXT: vid.v v12 +; RV32-BITS-512-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-BITS-512-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-512-NEXT: vmv.v.v v8, v12 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v8i64: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v12 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v12 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v8i64: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-256-NEXT: vid.v v12 +; RV64-BITS-256-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-256-NEXT: vmv.v.v v8, v12 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v8i64: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-512-NEXT: vid.v v12 +; RV64-BITS-512-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-512-NEXT: vmv.v.v v8, v12 +; RV64-BITS-512-NEXT: ret + %res = call <8 x i64> @llvm.experimental.vector.reverse.v8i64(<8 x i64> %a) + ret <8 x i64> %res +} + + +define <1 x half> @reverse_v1f16(<1 x half> %a) { +; CHECK-LABEL: reverse_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call <1 x half> @llvm.experimental.vector.reverse.v1f16(<1 x half> %a) + ret <1 x half> %res +} + +define <2 x half> @reverse_v2f16(<2 x half> %a) { +; CHECK-LABEL: reverse_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <2 x half> @llvm.experimental.vector.reverse.v2f16(<2 x half> %a) + ret <2 x half> %res +} + +define <4 x half> @reverse_v4f16(<4 x half> %a) { +; CHECK-LABEL: reverse_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <4 x half> @llvm.experimental.vector.reverse.v4f16(<4 x half> %a) + ret <4 x half> %res +} + +define <8 x half> @reverse_v8f16(<8 x half> %a) { +; CHECK-LABEL: reverse_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 7 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a) + ret <8 x half> %res +} + +define <16 x half> @reverse_v16f16(<16 x half> %a) { +; CHECK-LABEL: reverse_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 15 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %res = call <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half> %a) + ret <16 x half> %res +} + +define <32 x half> @reverse_v32f16(<32 x half> %a) { +; CHECK-LABEL: reverse_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI33_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %res = call <32 x half> @llvm.experimental.vector.reverse.v32f16(<32 x half> %a) + ret <32 x half> %res +} + +define <1 x float> @reverse_v1f32(<1 x float> %a) { +; CHECK-LABEL: reverse_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call <1 x float> @llvm.experimental.vector.reverse.v1f32(<1 x float> %a) + ret <1 x float> %res +} + +define <2 x float> @reverse_v2f32(<2 x float> %a) { +; CHECK-LABEL: reverse_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.experimental.vector.reverse.v2f32(<2 x float> %a) + ret <2 x float> %res +} + +define <4 x float> @reverse_v4f32(<4 x float> %a) { +; CHECK-LABEL: reverse_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a) + ret <4 x float> %res +} + +define <8 x float> @reverse_v8f32(<8 x float> %a) { +; CHECK-LABEL: reverse_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %res = call <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float> %a) + ret <8 x float> %res +} + +define <16 x float> @reverse_v16f32(<16 x float> %a) { +; CHECK-LABEL: reverse_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vi v16, v12, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a) + ret <16 x float> %res +} + +define <1 x double> @reverse_v1f64(<1 x double> %a) { +; CHECK-LABEL: reverse_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call <1 x double> @llvm.experimental.vector.reverse.v1f64(<1 x double> %a) + ret <1 x double> %res +} + +define <2 x double> @reverse_v2f64(<2 x double> %a) { +; CHECK-LABEL: reverse_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a) + ret <2 x double> %res +} + +define <4 x double> @reverse_v4f64(<4 x double> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v4f64: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v10 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v10 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v4f64: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-BITS-256-NEXT: vid.v v10 +; RV32-BITS-256-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-256-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-BITS-256-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-256-NEXT: vmv.v.v v8, v10 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v4f64: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-BITS-512-NEXT: vid.v v10 +; RV32-BITS-512-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-BITS-512-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-512-NEXT: vmv.v.v v8, v10 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v4f64: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v10 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v10 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v4f64: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-256-NEXT: vid.v v10 +; RV64-BITS-256-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-256-NEXT: vmv.v.v v8, v10 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v4f64: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-512-NEXT: vid.v v10 +; RV64-BITS-512-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-512-NEXT: vmv.v.v v8, v10 +; RV64-BITS-512-NEXT: ret + %res = call <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double> %a) + ret <4 x double> %res +} + +define <8 x double> @reverse_v8f64(<8 x double> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v8f64: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v12 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v12 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v8f64: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-BITS-256-NEXT: vid.v v12 +; RV32-BITS-256-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-256-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-BITS-256-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-256-NEXT: vmv.v.v v8, v12 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v8f64: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-BITS-512-NEXT: vid.v v12 +; RV32-BITS-512-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-BITS-512-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-512-NEXT: vmv.v.v v8, v12 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v8f64: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v12 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v12 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v8f64: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-256-NEXT: vid.v v12 +; RV64-BITS-256-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-256-NEXT: vmv.v.v v8, v12 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v8f64: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-512-NEXT: vid.v v12 +; RV64-BITS-512-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-512-NEXT: vmv.v.v v8, v12 +; RV64-BITS-512-NEXT: ret + %res = call <8 x double> @llvm.experimental.vector.reverse.v8f64(<8 x double> %a) + ret <8 x double> %res +} + + +define <3 x i64> @reverse_v3i64(<3 x i64> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v3i64: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI43_0) +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI43_0) +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vle32.v v12, (a0) +; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v10, v8, v12 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v10 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v3i64: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI43_0) +; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI43_0) +; RV32-BITS-256-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-BITS-256-NEXT: vle32.v v12, (a0) +; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v12 +; RV32-BITS-256-NEXT: vmv.v.v v8, v10 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v3i64: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI43_0) +; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI43_0) +; RV32-BITS-512-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-BITS-512-NEXT: vle32.v v12, (a0) +; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v12 +; RV32-BITS-512-NEXT: vmv.v.v v8, v10 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v3i64: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v10 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v12, v10, 2 +; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v10 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v3i64: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-256-NEXT: vid.v v10 +; RV64-BITS-256-NEXT: vrsub.vi v12, v10, 2 +; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-256-NEXT: vmv.v.v v8, v10 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v3i64: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-BITS-512-NEXT: vid.v v10 +; RV64-BITS-512-NEXT: vrsub.vi v12, v10, 2 +; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-512-NEXT: vmv.v.v v8, v10 +; RV64-BITS-512-NEXT: ret + %res = call <3 x i64> @llvm.experimental.vector.reverse.v3i64(<3 x i64> %a) + ret <3 x i64> %res +} + +define <6 x i64> @reverse_v6i64(<6 x i64> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v6i64: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI44_0) +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI44_0) +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vle32.v v16, (a0) +; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v12, v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v12 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v6i64: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI44_0) +; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI44_0) +; RV32-BITS-256-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-BITS-256-NEXT: vle32.v v16, (a0) +; RV32-BITS-256-NEXT: vrgather.vv v12, v8, v16 +; RV32-BITS-256-NEXT: vmv.v.v v8, v12 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v6i64: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI44_0) +; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI44_0) +; RV32-BITS-512-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-BITS-512-NEXT: vle32.v v16, (a0) +; RV32-BITS-512-NEXT: vrgather.vv v12, v8, v16 +; RV32-BITS-512-NEXT: vmv.v.v v8, v12 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v6i64: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v12 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v16, v12, 5 +; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v12 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v6i64: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-256-NEXT: vid.v v12 +; RV64-BITS-256-NEXT: vrsub.vi v16, v12, 5 +; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-256-NEXT: vmv.v.v v8, v12 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v6i64: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-BITS-512-NEXT: vid.v v12 +; RV64-BITS-512-NEXT: vrsub.vi v16, v12, 5 +; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-512-NEXT: vmv.v.v v8, v12 +; RV64-BITS-512-NEXT: ret + %res = call <6 x i64> @llvm.experimental.vector.reverse.v6i64(<6 x i64> %a) + ret <6 x i64> %res +} + +define <12 x i64> @reverse_v12i64(<12 x i64> %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_v12i64: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-BITS-UNKNOWN-NEXT: li a1, 32 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vle32.v v24, (a0) +; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v16, v8, v24 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_v12i64: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-BITS-256-NEXT: li a1, 32 +; RV32-BITS-256-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-BITS-256-NEXT: vle32.v v24, (a0) +; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v24 +; RV32-BITS-256-NEXT: vmv.v.v v8, v16 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_v12i64: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-BITS-512-NEXT: li a1, 32 +; RV32-BITS-512-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-BITS-512-NEXT: vle32.v v24, (a0) +; RV32-BITS-512-NEXT: vrgather.vv v16, v8, v24 +; RV32-BITS-512-NEXT: vmv.v.v v8, v16 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_v12i64: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v24, v16, 11 +; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v16, v8, v24 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_v12i64: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-BITS-256-NEXT: vid.v v16 +; RV64-BITS-256-NEXT: vrsub.vi v24, v16, 11 +; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v24 +; RV64-BITS-256-NEXT: vmv.v.v v8, v16 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_v12i64: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-BITS-512-NEXT: vid.v v16 +; RV64-BITS-512-NEXT: vrsub.vi v24, v16, 11 +; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v24 +; RV64-BITS-512-NEXT: vmv.v.v v8, v16 +; RV64-BITS-512-NEXT: ret + %res = call <12 x i64> @llvm.experimental.vector.reverse.v12i64(<12 x i64> %a) + ret <12 x i64> %res +} + +declare <2 x i1> @llvm.experimental.vector.reverse.v2i1(<2 x i1>) +declare <4 x i1> @llvm.experimental.vector.reverse.v4i1(<4 x i1>) +declare <8 x i1> @llvm.experimental.vector.reverse.v8i1(<8 x i1>) +declare <16 x i1> @llvm.experimental.vector.reverse.v16i1(<16 x i1>) +declare <32 x i1> @llvm.experimental.vector.reverse.v32i1(<32 x i1>) +declare <64 x i1> @llvm.experimental.vector.reverse.v64i1(<64 x i1>) +declare <1 x i8> @llvm.experimental.vector.reverse.v1i8(<1 x i8>) +declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>) +declare <4 x i8> @llvm.experimental.vector.reverse.v4i8(<4 x i8>) +declare <8 x i8> @llvm.experimental.vector.reverse.v8i8(<8 x i8>) +declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>) +declare <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8>) +declare <64 x i8> @llvm.experimental.vector.reverse.v64i8(<64 x i8>) +declare <1 x i16> @llvm.experimental.vector.reverse.v1i16(<1 x i16>) +declare <2 x i16> @llvm.experimental.vector.reverse.v2i16(<2 x i16>) +declare <4 x i16> @llvm.experimental.vector.reverse.v4i16(<4 x i16>) +declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>) +declare <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16>) +declare <32 x i16> @llvm.experimental.vector.reverse.v32i16(<32 x i16>) +declare <1 x i32> @llvm.experimental.vector.reverse.v1i32(<1 x i32>) +declare <2 x i32> @llvm.experimental.vector.reverse.v2i32(<2 x i32>) +declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>) +declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>) +declare <16 x i32> @llvm.experimental.vector.reverse.v16i32(<16 x i32>) +declare <1 x i64> @llvm.experimental.vector.reverse.v1i64(<1 x i64>) +declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>) +declare <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64>) +declare <8 x i64> @llvm.experimental.vector.reverse.v8i64(<8 x i64>) +declare <1 x half> @llvm.experimental.vector.reverse.v1f16(<1 x half>) +declare <2 x half> @llvm.experimental.vector.reverse.v2f16(<2 x half>) +declare <4 x half> @llvm.experimental.vector.reverse.v4f16(<4 x half>) +declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>) +declare <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half>) +declare <32 x half> @llvm.experimental.vector.reverse.v32f16(<32 x half>) +declare <1 x float> @llvm.experimental.vector.reverse.v1f32(<1 x float>) +declare <2 x float> @llvm.experimental.vector.reverse.v2f32(<2 x float>) +declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>) +declare <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float>) +declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>) +declare <1 x double> @llvm.experimental.vector.reverse.v1f64(<1 x double>) +declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>) +declare <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double>) +declare <8 x double> @llvm.experimental.vector.reverse.v8f64(<8 x double>) +declare <3 x i64> @llvm.experimental.vector.reverse.v3i64(<3 x i64>) +declare <6 x i64> @llvm.experimental.vector.reverse.v6i64(<6 x i64>) +declare <12 x i64> @llvm.experimental.vector.reverse.v12i64(<12 x i64>) Index: llvm/test/CodeGen/RISCV/shuffle-reverse.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/shuffle-reverse.ll @@ -0,0 +1,658 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+f,+zfh,+experimental-zvfh,+d -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+f,+zfh,+experimental-zvfh,+d -verify-machineinstrs | FileCheck %s + +define <2 x i8> @v2i8(<2 x i8> %a) { +; CHECK-LABEL: v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v2i8 = shufflevector <2 x i8> %a, <2 x i8> undef, <2 x i32> + ret <2 x i8> %v2i8 +} + +define <4 x i8> @v2i8_2(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: v2i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: ret + %v4i8 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> + ret <4 x i8> %v4i8 +} + +define <4 x i8> @v4i8(<4 x i8> %a) { +; CHECK-LABEL: v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v4i8 = shufflevector <4 x i8> %a, <4 x i8> undef, <4 x i32> + ret <4 x i8> %v4i8 +} + +define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: v4i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrsub.vi v12, v11, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %v8i8 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> + ret <8 x i8> %v8i8 +} + +define <8 x i8> @v8i8(<8 x i8> %a) { +; CHECK-LABEL: v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 7 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v8i8 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %v8i8 +} + +define <16 x i8> @v8i8_2(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: v8i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrsub.vi v12, v11, 15 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrsub.vi v8, v11, 7 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v16i8 = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> + ret <16 x i8> %v16i8 +} + +define <16 x i8> @v16i8(<16 x i8> %a) { +; CHECK-LABEL: v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 15 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v16i8 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %v16i8 +} + +define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) { + %v32i8 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> + ret <32 x i8> %v32i8 +} + +define <2 x i16> @v2i16(<2 x i16> %a) { +; CHECK-LABEL: v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v2i16 = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> + ret <2 x i16> %v2i16 +} + +define <4 x i16> @v2i16_2(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: v2i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: ret + %v4i16 = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> + ret <4 x i16> %v4i16 +} + +define <4 x i16> @v4i16(<4 x i16> %a) { +; CHECK-LABEL: v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v4i16 = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %v4i16 +} + +define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: v4i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrsub.vi v12, v11, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v8i16 = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> + ret <8 x i16> %v8i16 +} + +define <8 x i16> @v8i16(<8 x i16> %a) { +; CHECK-LABEL: v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 7 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v8i16 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %v8i16 +} + +define <16 x i16> @v8i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: v8i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v9 +; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vid.v v14 +; CHECK-NEXT: vrsub.vi v16, v14, 15 +; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v14, 7 +; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v16i16 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> + ret <16 x i16> %v16i16 +} + +define <16 x i16> @v16i16(<16 x i16> %a) { +; CHECK-LABEL: v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 15 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v16i16 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> + ret <16 x i16> %v16i16 +} + +define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { + %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> + ret <32 x i16> %v32i16 +} + +define <2 x i32> @v2i32(<2 x i32> %a) { +; CHECK-LABEL: v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v2i32 = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> + ret <2 x i32> %v2i32 +} + +define <4 x i32> @v2i32_2(<2 x i32> %a, < 2 x i32> %b) { +; CHECK-LABEL: v2i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: ret + %v4i32 = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> + ret <4 x i32> %v4i32 +} + +define <4 x i32> @v4i32(<4 x i32> %a) { +; CHECK-LABEL: v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v4i32 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %v4i32 +} + +define <8 x i32> @v4i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: v4i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v9 +; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vid.v v14 +; CHECK-NEXT: vrsub.vi v16, v14, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v14, 3 +; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v8i32 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> + ret <8 x i32> %v8i32 +} + +define <8 x i32> @v8i32(<8 x i32> %a) { +; CHECK-LABEL: v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v8i32 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> + ret <8 x i32> %v8i32 +} + +define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: v8i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv2r.v v16, v10 +; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vid.v v20 +; CHECK-NEXT: vrsub.vi v24, v20, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v24 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v20, 7 +; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %v16i32 = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> + ret <16 x i32> %v16i32 +} + +define <16 x i32> @v16i32(<16 x i32> %a) { +; CHECK-LABEL: v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vi v16, v12, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %v16i32 = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> + ret <16 x i32> %v16i32 +} + +define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { + %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> + ret <32 x i32> %v32i32 +} + +define <2 x i64> @v2i64(<2 x i64> %a) { +; CHECK-LABEL: v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v2i64 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + ret <2 x i64> %v2i64 +} + +define <4 x i64> @v2i64_2(<2 x i64> %a, < 2 x i64> %b) { +; CHECK-LABEL: v2i64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v12, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v10, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vi v10, v12, 2 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v4i64 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + ret <4 x i64> %v4i64 +} + +define <4 x i64> @v4i64(<4 x i64> %a) { + %v4i64 = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> + ret <4 x i64> %v4i64 +} + +define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) { + %v8i64 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> + ret <8 x i64> %v8i64 +} + +define <2 x half> @v2f16(<2 x half> %a) { +; CHECK-LABEL: v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v2f16 = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> + ret <2 x half> %v2f16 +} + +define <4 x half> @v2f16_2(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: v2f16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: ret + %v4f16 = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> + ret <4 x half> %v4f16 +} + +define <4 x half> @v4f16(<4 x half> %a) { +; CHECK-LABEL: v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v4f16 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> + ret <4 x half> %v4f16 +} + +define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: v4f16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrsub.vi v12, v11, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v8f16 = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> + ret <8 x half> %v8f16 +} + +define <8 x half> @v8f16(<8 x half> %a) { +; CHECK-LABEL: v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 7 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v8f16 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> + ret <8 x half> %v8f16 +} + +define <16 x half> @v8f16_2(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: v8f16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v9 +; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vid.v v14 +; CHECK-NEXT: vrsub.vi v16, v14, 15 +; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v14, 7 +; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v16f16 = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> + ret <16 x half> %v16f16 +} + +define <16 x half> @v16f16(<16 x half> %a) { +; CHECK-LABEL: v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 15 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v16f16 = shufflevector <16 x half> %a, <16 x half> undef, <16 x i32> + ret <16 x half> %v16f16 +} + +define <32 x half> @v16f16_2(<16 x half> %a) { +; CHECK-LABEL: v16f16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetivli zero, 16, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vi v16, v8, 0 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vrgather.vv v8, v16, v12 +; CHECK-NEXT: ret + %v32f16 = shufflevector <16 x half> %a, <16 x half> undef, <32 x i32> + ret <32 x half> %v32f16 +} + +define <2 x float> @v2f32(<2 x float> %a) { +; CHECK-LABEL: v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v2f32 = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> + ret <2 x float> %v2f32 +} + +define <4 x float> @v2f32_2(<2 x float> %a, <2 x float> %b) { +; CHECK-LABEL: v2f32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: ret + %v4f32 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> + ret <4 x float> %v4f32 +} + +define <4 x float> @v4f32(<4 x float> %a) { +; CHECK-LABEL: v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 3 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v4f32 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %v4f32 +} + +define <8 x float> @v4f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: v4f32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v9 +; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vid.v v14 +; CHECK-NEXT: vrsub.vi v16, v14, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v14, 3 +; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v8f32 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> + ret <8 x float> %v8f32 +} + +define <8 x float> @v8f32(<8 x float> %a) { +; CHECK-LABEL: v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 7 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v8f32 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> + ret <8 x float> %v8f32 +} + +define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: v8f32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv2r.v v16, v10 +; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vid.v v20 +; CHECK-NEXT: vrsub.vi v24, v20, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v24 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrsub.vi v8, v20, 7 +; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %v16f32 = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> + ret <16 x float> %v16f32 +} + +define <2 x double> @v2f64(<2 x double> %a) { +; CHECK-LABEL: v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v2f64 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> + ret <2 x double> %v2f64 +} + +define <4 x double> @v2f64_2(<2 x double> %a, < 2 x double> %b) { +; CHECK-LABEL: v2f64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v12, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v10, v9, 1 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vi v10, v12, 2 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v4f64 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> + ret <4 x double> %v4f64 +} + +define <4 x double> @v4f64(<4 x double> %a) { + %v4f64 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> + ret <4 x double> %v4f64 +} + +define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { + %v8f64 = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> + ret <8 x double> %v8f64 +} + +define <32 x i8> @v32i8(<32 x i8> %a) { +; CHECK-LABEL: v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI46_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v32i8 = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> + ret <32 x i8> %v32i8 +} +