diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4228,6 +4228,8 @@ unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts)); MVT ViaVT = MVT::getVectorVT(MVT::getIntegerVT(ViaEltSize), 1); + if (!ViaVT.isValid()) + return SDValue(); MVT ViaBitVT = MVT::getVectorVT(MVT::i1, ViaVT.getScalarSizeInBits()); // If we don't have zvbb or the larger element type > ELEN, the operation will diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -2,9 +2,11 @@ ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV32-BITS-UNKNOWN ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV32-BITS-256 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV32-BITS-512 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=1024 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV32-BITS-1024 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV64-BITS-UNKNOWN ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV64-BITS-256 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV64-BITS-512 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=1024 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV64-BITS-1024 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV32-ZVBB ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV64-ZVBB @@ -160,6 +162,41 @@ ret <128 x i1> %res } +define <256 x i1> @reverse_v256i1(<256 x i1> %a) { +; CHECK-LABEL: reverse_v256i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 +; CHECK-NEXT: vrgather.vv v0, v8, v16 +; CHECK-NEXT: vmsne.vi v10, v0, 0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 +; CHECK-NEXT: vrgather.vv v0, v24, v16 +; CHECK-NEXT: vmsne.vi v8, v0, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %res = call <256 x i1> @llvm.experimental.vector.reverse.v256i1(<256 x i1> %a) + ret <256 x i1> %res +} + define <1 x i8> @reverse_v1i8(<1 x i8> %a) { ; CHECK-LABEL: reverse_v1i8: ; CHECK: # %bb.0: @@ -228,8 +265,8 @@ define <32 x i8> @reverse_v32i8(<32 x i8> %a) { ; CHECK-LABEL: reverse_v32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI13_0) ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v12, (a0) @@ -243,8 +280,8 @@ define <64 x i8> @reverse_v64i8(<64 x i8> %a) { ; CHECK-LABEL: reverse_v64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI13_0) +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0) ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -323,8 +360,8 @@ define <32 x i16> @reverse_v32i16(<32 x i16> %a) { ; CHECK-LABEL: reverse_v32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI20_0) ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -451,6 +488,16 @@ ; RV32-BITS-512-NEXT: vmv.v.v v8, v10 ; RV32-BITS-512-NEXT: ret ; +; RV32-BITS-1024-LABEL: reverse_v4i64: +; RV32-BITS-1024: # %bb.0: +; RV32-BITS-1024-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-BITS-1024-NEXT: vid.v v10 +; RV32-BITS-1024-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-1024-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-BITS-1024-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-1024-NEXT: vmv.v.v v8, v10 +; RV32-BITS-1024-NEXT: ret +; ; RV64-BITS-UNKNOWN-LABEL: reverse_v4i64: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -478,6 +525,15 @@ ; RV64-BITS-512-NEXT: vmv.v.v v8, v10 ; RV64-BITS-512-NEXT: ret ; +; RV64-BITS-1024-LABEL: reverse_v4i64: +; RV64-BITS-1024: # %bb.0: +; RV64-BITS-1024-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-BITS-1024-NEXT: vid.v v10 +; RV64-BITS-1024-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-1024-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-1024-NEXT: vmv.v.v v8, v10 +; RV64-BITS-1024-NEXT: ret +; ; RV32-ZVBB-LABEL: reverse_v4i64: ; RV32-ZVBB: # %bb.0: ; RV32-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma @@ -531,6 +587,16 @@ ; RV32-BITS-512-NEXT: vmv.v.v v8, v12 ; RV32-BITS-512-NEXT: ret ; +; RV32-BITS-1024-LABEL: reverse_v8i64: +; RV32-BITS-1024: # %bb.0: +; RV32-BITS-1024-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-BITS-1024-NEXT: vid.v v12 +; RV32-BITS-1024-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-1024-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-BITS-1024-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-1024-NEXT: vmv.v.v v8, v12 +; RV32-BITS-1024-NEXT: ret +; ; RV64-BITS-UNKNOWN-LABEL: reverse_v8i64: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e64, m4, ta, ma @@ -558,6 +624,15 @@ ; RV64-BITS-512-NEXT: vmv.v.v v8, v12 ; RV64-BITS-512-NEXT: ret ; +; RV64-BITS-1024-LABEL: reverse_v8i64: +; RV64-BITS-1024: # %bb.0: +; RV64-BITS-1024-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BITS-1024-NEXT: vid.v v12 +; RV64-BITS-1024-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-1024-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-1024-NEXT: vmv.v.v v8, v12 +; RV64-BITS-1024-NEXT: ret +; ; RV32-ZVBB-LABEL: reverse_v8i64: ; RV32-ZVBB: # %bb.0: ; RV32-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -649,8 +724,8 @@ define <32 x half> @reverse_v32f16(<32 x half> %a) { ; CHECK-LABEL: reverse_v32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -777,6 +852,16 @@ ; RV32-BITS-512-NEXT: vmv.v.v v8, v10 ; RV32-BITS-512-NEXT: ret ; +; RV32-BITS-1024-LABEL: reverse_v4f64: +; RV32-BITS-1024: # %bb.0: +; RV32-BITS-1024-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-BITS-1024-NEXT: vid.v v10 +; RV32-BITS-1024-NEXT: vrsub.vi v12, v10, 3 +; RV32-BITS-1024-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32-BITS-1024-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-BITS-1024-NEXT: vmv.v.v v8, v10 +; RV32-BITS-1024-NEXT: ret +; ; RV64-BITS-UNKNOWN-LABEL: reverse_v4f64: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -804,6 +889,15 @@ ; RV64-BITS-512-NEXT: vmv.v.v v8, v10 ; RV64-BITS-512-NEXT: ret ; +; RV64-BITS-1024-LABEL: reverse_v4f64: +; RV64-BITS-1024: # %bb.0: +; RV64-BITS-1024-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-BITS-1024-NEXT: vid.v v10 +; RV64-BITS-1024-NEXT: vrsub.vi v12, v10, 3 +; RV64-BITS-1024-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-1024-NEXT: vmv.v.v v8, v10 +; RV64-BITS-1024-NEXT: ret +; ; RV32-ZVBB-LABEL: reverse_v4f64: ; RV32-ZVBB: # %bb.0: ; RV32-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma @@ -857,6 +951,16 @@ ; RV32-BITS-512-NEXT: vmv.v.v v8, v12 ; RV32-BITS-512-NEXT: ret ; +; RV32-BITS-1024-LABEL: reverse_v8f64: +; RV32-BITS-1024: # %bb.0: +; RV32-BITS-1024-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-BITS-1024-NEXT: vid.v v12 +; RV32-BITS-1024-NEXT: vrsub.vi v16, v12, 7 +; RV32-BITS-1024-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-BITS-1024-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-BITS-1024-NEXT: vmv.v.v v8, v12 +; RV32-BITS-1024-NEXT: ret +; ; RV64-BITS-UNKNOWN-LABEL: reverse_v8f64: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e64, m4, ta, ma @@ -884,6 +988,15 @@ ; RV64-BITS-512-NEXT: vmv.v.v v8, v12 ; RV64-BITS-512-NEXT: ret ; +; RV64-BITS-1024-LABEL: reverse_v8f64: +; RV64-BITS-1024: # %bb.0: +; RV64-BITS-1024-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BITS-1024-NEXT: vid.v v12 +; RV64-BITS-1024-NEXT: vrsub.vi v16, v12, 7 +; RV64-BITS-1024-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-1024-NEXT: vmv.v.v v8, v12 +; RV64-BITS-1024-NEXT: ret +; ; RV32-ZVBB-LABEL: reverse_v8f64: ; RV32-ZVBB: # %bb.0: ; RV32-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -910,8 +1023,8 @@ define <3 x i64> @reverse_v3i64(<3 x i64> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v3i64: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI44_0) -; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI44_0) +; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI45_0) ; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vle32.v v12, (a0) ; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v10, v8, v12 @@ -920,8 +1033,8 @@ ; ; RV32-BITS-256-LABEL: reverse_v3i64: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI44_0) -; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI44_0) +; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI45_0) ; RV32-BITS-256-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-BITS-256-NEXT: vle32.v v12, (a0) ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v12 @@ -930,14 +1043,24 @@ ; ; RV32-BITS-512-LABEL: reverse_v3i64: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI44_0) -; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI44_0) +; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI45_0) ; RV32-BITS-512-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-BITS-512-NEXT: vle32.v v12, (a0) ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v12 ; RV32-BITS-512-NEXT: vmv.v.v v8, v10 ; RV32-BITS-512-NEXT: ret ; +; RV32-BITS-1024-LABEL: reverse_v3i64: +; RV32-BITS-1024: # %bb.0: +; RV32-BITS-1024-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-BITS-1024-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-BITS-1024-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-BITS-1024-NEXT: vle32.v v12, (a0) +; RV32-BITS-1024-NEXT: vrgather.vv v10, v8, v12 +; RV32-BITS-1024-NEXT: vmv.v.v v8, v10 +; RV32-BITS-1024-NEXT: ret +; ; RV64-BITS-UNKNOWN-LABEL: reverse_v3i64: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -965,10 +1088,19 @@ ; RV64-BITS-512-NEXT: vmv.v.v v8, v10 ; RV64-BITS-512-NEXT: ret ; +; RV64-BITS-1024-LABEL: reverse_v3i64: +; RV64-BITS-1024: # %bb.0: +; RV64-BITS-1024-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-BITS-1024-NEXT: vid.v v10 +; RV64-BITS-1024-NEXT: vrsub.vi v12, v10, 2 +; RV64-BITS-1024-NEXT: vrgather.vv v10, v8, v12 +; RV64-BITS-1024-NEXT: vmv.v.v v8, v10 +; RV64-BITS-1024-NEXT: ret +; ; RV32-ZVBB-LABEL: reverse_v3i64: ; RV32-ZVBB: # %bb.0: -; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI44_0) -; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI44_0) +; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI45_0) ; RV32-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-ZVBB-NEXT: vle32.v v12, (a0) ; RV32-ZVBB-NEXT: vrgather.vv v10, v8, v12 @@ -990,8 +1122,8 @@ define <6 x i64> @reverse_v6i64(<6 x i64> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v6i64: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI45_0) -; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI46_0) +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI46_0) ; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vle32.v v16, (a0) ; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v12, v8, v16 @@ -1000,8 +1132,8 @@ ; ; RV32-BITS-256-LABEL: reverse_v6i64: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI45_0) -; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI46_0) +; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI46_0) ; RV32-BITS-256-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-BITS-256-NEXT: vle32.v v16, (a0) ; RV32-BITS-256-NEXT: vrgather.vv v12, v8, v16 @@ -1010,14 +1142,24 @@ ; ; RV32-BITS-512-LABEL: reverse_v6i64: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI45_0) -; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI46_0) +; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI46_0) ; RV32-BITS-512-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-BITS-512-NEXT: vle32.v v16, (a0) ; RV32-BITS-512-NEXT: vrgather.vv v12, v8, v16 ; RV32-BITS-512-NEXT: vmv.v.v v8, v12 ; RV32-BITS-512-NEXT: ret ; +; RV32-BITS-1024-LABEL: reverse_v6i64: +; RV32-BITS-1024: # %bb.0: +; RV32-BITS-1024-NEXT: lui a0, %hi(.LCPI46_0) +; RV32-BITS-1024-NEXT: addi a0, a0, %lo(.LCPI46_0) +; RV32-BITS-1024-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-BITS-1024-NEXT: vle32.v v16, (a0) +; RV32-BITS-1024-NEXT: vrgather.vv v12, v8, v16 +; RV32-BITS-1024-NEXT: vmv.v.v v8, v12 +; RV32-BITS-1024-NEXT: ret +; ; RV64-BITS-UNKNOWN-LABEL: reverse_v6i64: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e64, m4, ta, ma @@ -1045,10 +1187,19 @@ ; RV64-BITS-512-NEXT: vmv.v.v v8, v12 ; RV64-BITS-512-NEXT: ret ; +; RV64-BITS-1024-LABEL: reverse_v6i64: +; RV64-BITS-1024: # %bb.0: +; RV64-BITS-1024-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BITS-1024-NEXT: vid.v v12 +; RV64-BITS-1024-NEXT: vrsub.vi v16, v12, 5 +; RV64-BITS-1024-NEXT: vrgather.vv v12, v8, v16 +; RV64-BITS-1024-NEXT: vmv.v.v v8, v12 +; RV64-BITS-1024-NEXT: ret +; ; RV32-ZVBB-LABEL: reverse_v6i64: ; RV32-ZVBB: # %bb.0: -; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI45_0) -; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI45_0) +; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI46_0) +; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI46_0) ; RV32-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-ZVBB-NEXT: vle32.v v16, (a0) ; RV32-ZVBB-NEXT: vrgather.vv v12, v8, v16 @@ -1070,8 +1221,8 @@ define <12 x i64> @reverse_v12i64(<12 x i64> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v12i64: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI46_0) +; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI47_0) +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI47_0) ; RV32-BITS-UNKNOWN-NEXT: li a1, 32 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vle32.v v24, (a0) @@ -1081,8 +1232,8 @@ ; ; RV32-BITS-256-LABEL: reverse_v12i64: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI46_0) +; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI47_0) +; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI47_0) ; RV32-BITS-256-NEXT: li a1, 32 ; RV32-BITS-256-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-BITS-256-NEXT: vle32.v v24, (a0) @@ -1092,8 +1243,8 @@ ; ; RV32-BITS-512-LABEL: reverse_v12i64: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI46_0) +; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI47_0) +; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI47_0) ; RV32-BITS-512-NEXT: li a1, 32 ; RV32-BITS-512-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-BITS-512-NEXT: vle32.v v24, (a0) @@ -1101,6 +1252,17 @@ ; RV32-BITS-512-NEXT: vmv.v.v v8, v16 ; RV32-BITS-512-NEXT: ret ; +; RV32-BITS-1024-LABEL: reverse_v12i64: +; RV32-BITS-1024: # %bb.0: +; RV32-BITS-1024-NEXT: lui a0, %hi(.LCPI47_0) +; RV32-BITS-1024-NEXT: addi a0, a0, %lo(.LCPI47_0) +; RV32-BITS-1024-NEXT: li a1, 32 +; RV32-BITS-1024-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-BITS-1024-NEXT: vle32.v v24, (a0) +; RV32-BITS-1024-NEXT: vrgather.vv v16, v8, v24 +; RV32-BITS-1024-NEXT: vmv.v.v v8, v16 +; RV32-BITS-1024-NEXT: ret +; ; RV64-BITS-UNKNOWN-LABEL: reverse_v12i64: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -1128,10 +1290,19 @@ ; RV64-BITS-512-NEXT: vmv.v.v v8, v16 ; RV64-BITS-512-NEXT: ret ; +; RV64-BITS-1024-LABEL: reverse_v12i64: +; RV64-BITS-1024: # %bb.0: +; RV64-BITS-1024-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-BITS-1024-NEXT: vid.v v16 +; RV64-BITS-1024-NEXT: vrsub.vi v24, v16, 11 +; RV64-BITS-1024-NEXT: vrgather.vv v16, v8, v24 +; RV64-BITS-1024-NEXT: vmv.v.v v8, v16 +; RV64-BITS-1024-NEXT: ret +; ; RV32-ZVBB-LABEL: reverse_v12i64: ; RV32-ZVBB: # %bb.0: -; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI46_0) +; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI47_0) +; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI47_0) ; RV32-ZVBB-NEXT: li a1, 32 ; RV32-ZVBB-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-ZVBB-NEXT: vle32.v v24, (a0) @@ -1158,6 +1329,7 @@ declare <32 x i1> @llvm.experimental.vector.reverse.v32i1(<32 x i1>) declare <64 x i1> @llvm.experimental.vector.reverse.v64i1(<64 x i1>) declare <128 x i1> @llvm.experimental.vector.reverse.v128i1(<128 x i1>) +declare <256 x i1> @llvm.experimental.vector.reverse.v256i1(<256 x i1>) declare <1 x i8> @llvm.experimental.vector.reverse.v1i8(<1 x i8>) declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>) declare <4 x i8> @llvm.experimental.vector.reverse.v4i8(<4 x i8>)