diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -360,18 +360,28 @@ unsigned getSEW() const { return SEW; } RISCVII::VLMUL getVLMUL() const { return VLMul; } - bool hasNonZeroAVL() const { + bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const { if (hasAVLImm()) return getAVLImm() > 0; - if (hasAVLReg()) - return getAVLReg() == RISCV::X0; + if (hasAVLReg()) { + if (getAVLReg() == RISCV::X0) + return true; + if (MachineInstr *MI = MRI.getVRegDef(getAVLReg()); + MI && MI->getOpcode() == RISCV::ADDI && + MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && + MI->getOperand(1).getReg() == RISCV::X0 && + MI->getOperand(2).getImm() != 0) + return true; + return false; + } return false; } - bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const { + bool hasEquallyZeroAVL(const VSETVLIInfo &Other, + const MachineRegisterInfo &MRI) const { if (hasSameAVL(Other)) return true; - return (hasNonZeroAVL() && Other.hasNonZeroAVL()); + return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI)); } bool hasSameAVL(const VSETVLIInfo &Other) const { @@ -447,7 +457,8 @@ // Determine whether the vector instructions requirements represented by // Require are compatible with the previous vsetvli instruction represented // by this. MI is the instruction whose requirements we're considering. - bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require) const { + bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, + const MachineRegisterInfo &MRI) const { assert(isValid() && Require.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!Require.SEWLMULRatioOnly && @@ -469,7 +480,7 @@ if (Used.VLAny && !hasSameAVL(Require)) return false; - if (Used.VLZeroness && !hasEquallyZeroAVL(Require)) + if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI)) return false; return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); @@ -881,7 +892,7 @@ } } - if (CurInfo.isCompatible(Used, Require)) + if (CurInfo.isCompatible(Used, Require, *MRI)) return false; // We didn't find a compatible value. If our AVL is a virtual register, @@ -929,7 +940,7 @@ // prevent extending live range of an avl register operand. // TODO: We can probably relax this for immediates. if (isScalarMoveInstr(MI) && PrevInfo.isValid() && - PrevInfo.hasEquallyZeroAVL(Info) && + PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) { if (PrevInfo.hasAVLImm()) Info.setAVLImm(PrevInfo.getAVLImm()); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh,+m -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh,+m -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 ; Test optimizing interleaves to widening arithmetic. @@ -255,48 +255,56 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-V128-NEXT: vle32.v v0, (a0) -; RV32-V128-NEXT: vmv8r.v v24, v8 -; RV32-V128-NEXT: vrgather.vv v8, v24, v0 -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-V128-NEXT: vle32.v v24, (a0) ; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV32-V128-NEXT: vle32.v v24, (a0) +; RV32-V128-NEXT: vle32.v v16, (a0) ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, 699051 ; RV32-V128-NEXT: addi a0, a0, -1366 -; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-V128-NEXT: vmv.s.x v0, a0 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-V128-NEXT: vrgather.vv v16, v8, v24 +; RV32-V128-NEXT: addi a0, sp, 16 +; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 ; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV32-V128-NEXT: vmv.v.v v24, v8 +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV32-V128-NEXT: vmv8r.v v8, v0 -; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -306,48 +314,56 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-V128-NEXT: vle32.v v0, (a0) -; RV64-V128-NEXT: vmv8r.v v24, v8 -; RV64-V128-NEXT: vrgather.vv v8, v24, v0 -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-V128-NEXT: vle32.v v24, (a0) ; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV64-V128-NEXT: vle32.v v24, (a0) +; RV64-V128-NEXT: vle32.v v16, (a0) ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, 699051 ; RV64-V128-NEXT: addiw a0, a0, -1366 -; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-V128-NEXT: vmv.s.x v0, a0 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-V128-NEXT: vrgather.vv v16, v8, v24 +; RV64-V128-NEXT: addi a0, sp, 16 +; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 ; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV64-V128-NEXT: vmv.v.v v24, v8 +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV64-V128-NEXT: vmv8r.v v8, v0 -; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 ; Test optimizing interleaves to widening arithmetic. @@ -411,48 +411,56 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-V128-NEXT: vle32.v v0, (a0) -; RV32-V128-NEXT: vmv8r.v v24, v8 -; RV32-V128-NEXT: vrgather.vv v8, v24, v0 -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-V128-NEXT: vle32.v v24, (a0) ; RV32-V128-NEXT: lui a0, %hi(.LCPI17_1) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV32-V128-NEXT: vle32.v v24, (a0) +; RV32-V128-NEXT: vle32.v v16, (a0) ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, 699051 ; RV32-V128-NEXT: addi a0, a0, -1366 -; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-V128-NEXT: vmv.s.x v0, a0 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-V128-NEXT: vrgather.vv v16, v8, v24 +; RV32-V128-NEXT: addi a0, sp, 16 +; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 ; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV32-V128-NEXT: vmv.v.v v24, v8 +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV32-V128-NEXT: vmv8r.v v8, v0 -; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -462,48 +470,56 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-V128-NEXT: vle32.v v0, (a0) -; RV64-V128-NEXT: vmv8r.v v24, v8 -; RV64-V128-NEXT: vrgather.vv v8, v24, v0 -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-V128-NEXT: vle32.v v24, (a0) ; RV64-V128-NEXT: lui a0, %hi(.LCPI17_1) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV64-V128-NEXT: vle32.v v24, (a0) +; RV64-V128-NEXT: vle32.v v16, (a0) ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, 699051 ; RV64-V128-NEXT: addiw a0, a0, -1366 -; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-V128-NEXT: vmv.s.x v0, a0 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-V128-NEXT: vrgather.vv v16, v8, v24 +; RV64-V128-NEXT: addi a0, sp, 16 +; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 ; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV64-V128-NEXT: vmv.v.v v24, v8 +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV64-V128-NEXT: vmv8r.v v8, v0 -; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -132,10 +132,10 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 78 +; RV32-NEXT: li a3, 82 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xce, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 78 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma @@ -150,127 +150,156 @@ ; RV32-NEXT: vs4r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vadd.vi v8, v16, -4 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 13 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a4, a3, 4 +; RV32-NEXT: add a3, a4, a3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs4r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vrgather.vv v4, v24, v8 +; RV32-NEXT: vrgather.vv v12, v24, v8 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 41 +; RV32-NEXT: li a4, 45 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs4r.v v4, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vadd.vi v8, v16, -10 ; RV32-NEXT: lui a3, 12 ; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 29 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a4, a3, 5 +; RV32-NEXT: add a3, a4, a3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v24, 16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 53 +; RV32-NEXT: li a4, 57 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vrgather.vv v4, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 61 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, %hi(.LCPI6_0) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_0) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v24, (a3) -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 69 +; RV32-NEXT: li a4, 41 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, a1, 128 -; RV32-NEXT: vrgather.vv v8, v16, v24 -; RV32-NEXT: lui a3, %hi(.LCPI6_1) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_1) -; RV32-NEXT: lui a4, 1 -; RV32-NEXT: addi a4, a4, -64 -; RV32-NEXT: vle32.v v16, (a3) +; RV32-NEXT: vs4r.v v12, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a5, 21 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: slli a4, a3, 6 +; RV32-NEXT: add a3, a4, a3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a1, 128 +; RV32-NEXT: lui a4, %hi(.LCPI6_0) +; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0) +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: lui a5, %hi(.LCPI6_1) +; RV32-NEXT: addi a5, a5, %lo(.LCPI6_1) +; RV32-NEXT: lui a6, 1 +; RV32-NEXT: vle32.v v8, (a4) +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a7, a4, 3 +; RV32-NEXT: add a4, a7, a4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 73 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 +; RV32-NEXT: vle32.v v8, (a5) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: li a4, 25 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle32.v v8, (a3) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 49 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, a6, -64 +; RV32-NEXT: vmv.s.x v24, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 3 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v24, v16, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v2, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v24, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vslideup.vi v4, v8, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 41 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v8, v24, 0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 41 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vi v8, v24, -2 +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vi v16, v12, -2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 53 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v4, v16, v8 -; RV32-NEXT: vadd.vi v8, v24, -8 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v16 +; RV32-NEXT: vadd.vi v16, v12, -8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 61 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v4, v16, v8, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu @@ -279,42 +308,37 @@ ; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 25 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 69 +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v24, v8, v16 +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 25 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV32-NEXT: vrgather.vv v24, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vslideup.vi v4, v8, 0 +; RV32-NEXT: vslideup.vi v4, v24, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 25 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill @@ -323,12 +347,12 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 53 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v0, v8 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v12, v16, v8 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 @@ -337,87 +361,104 @@ ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vi v8, v8, -6 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 61 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv.v.v v4, v12 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: lui a3, %hi(.LCPI6_6) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) ; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v8, (a3) +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 960 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 69 +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v24, v16 -; RV32-NEXT: lui a1, %hi(.LCPI6_6) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_6) -; RV32-NEXT: li a3, 960 -; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vslideup.vi v4, v8, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v8, 0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 53 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v12, v24, v8 -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 61 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v12, v24, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill @@ -429,38 +470,41 @@ ; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 69 +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v0, v24 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vslideup.vi v12, v8, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill @@ -471,49 +515,61 @@ ; RV32-NEXT: lui a1, 15 ; RV32-NEXT: vmv.s.x v1, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 53 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v4, v16, v12 ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 61 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v4, v16, v8, v0.t ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: lui a3, %hi(.LCPI6_12) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) +; RV32-NEXT: vle32.v v24, (a1) +; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 69 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16 -; RV32-NEXT: lui a1, %hi(.LCPI6_12) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_12) -; RV32-NEXT: li a3, 1008 -; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v2, a3 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 1008 +; RV32-NEXT: vmv.s.x v2, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 73 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v16, v24 ; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 57 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vslideup.vi v4, v8, 0 @@ -523,20 +579,20 @@ ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 61 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -546,38 +602,38 @@ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a2, %hi(.LCPI6_15) ; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15) -; RV32-NEXT: vle32.v v24, (a1) +; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: vle32.v v8, (a2) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 61 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a2, a1, 6 +; RV32-NEXT: add a1, a2, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 69 +; RV32-NEXT: li a2, 73 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v16 ; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 45 +; RV32-NEXT: li a2, 49 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 61 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a2, a1, 6 +; RV32-NEXT: add a1, a2, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 41 +; RV32-NEXT: li a2, 45 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -590,37 +646,37 @@ ; RV32-NEXT: vse32.v v4, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a3, a2, 5 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 2 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 4 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 25 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 21 +; RV32-NEXT: li a2, 41 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 78 +; RV32-NEXT: li a1, 82 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12507,7 +12507,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -12521,7 +12520,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 @@ -12543,7 +12541,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 @@ -12570,7 +12567,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 9 @@ -12584,7 +12580,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10 @@ -12600,7 +12595,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11 @@ -12614,7 +12608,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12 @@ -12628,7 +12621,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13 @@ -12653,7 +12645,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17 @@ -12667,7 +12658,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18 @@ -12689,7 +12679,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21 @@ -12716,7 +12705,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25 @@ -12730,7 +12718,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26 @@ -12752,7 +12739,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 @@ -12766,7 +12752,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30 @@ -12781,7 +12766,6 @@ ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: li a1, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31 @@ -12795,7 +12779,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3 @@ -12819,7 +12802,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6 @@ -12832,7 +12814,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7 @@ -12856,7 +12837,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14 @@ -12869,7 +12849,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15 @@ -12895,7 +12874,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19 @@ -12919,7 +12897,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22 @@ -12932,7 +12909,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23 @@ -12958,7 +12934,6 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1296,12 +1296,10 @@ ; RV32-NEXT: vmv1r.v v16, v0 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: lui a3, 341 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: lui a1, 341 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmerge.vxm v24, v24, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v16 @@ -1324,12 +1322,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: lui a3, 341 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: lui a1, 341 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmerge.vxm v16, v16, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -407,7 +407,7 @@ ; RV32-NEXT: lui a0, %hi(.LCPI23_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI23_0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-NEXT: vle32.v v0, (a0) ; RV32-NEXT: vmv4r.v v24, v12 ; RV32-NEXT: vmv4r.v v16, v8 @@ -416,9 +416,7 @@ ; RV32-NEXT: vrsub.vi v16, v16, 15 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV32-NEXT: ret ; @@ -427,7 +425,7 @@ ; RV64-NEXT: lui a0, %hi(.LCPI23_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI23_0) ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-NEXT: vle32.v v0, (a0) ; RV64-NEXT: vmv4r.v v24, v12 ; RV64-NEXT: vmv4r.v v16, v8 @@ -436,9 +434,7 @@ ; RV64-NEXT: vrsub.vi v16, v16, 15 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV64-NEXT: ret %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32>