diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -39,6 +39,7 @@ RISCVRedundantCopyElimination.cpp RISCVRegisterBankInfo.cpp RISCVRegisterInfo.cpp + RISCVRVVInitUndef.cpp RISCVSExtWRemoval.cpp RISCVSubtarget.cpp RISCVTargetMachine.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -59,6 +59,9 @@ FunctionPass *createRISCVInsertVSETVLIPass(); void initializeRISCVInsertVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVInitUndefPass(); +void initializeRISCVInitUndefPass(PassRegistry &); + FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp @@ -0,0 +1,201 @@ +//===- RISCVInitUndef.cpp - Initialize undef vector value to zero ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that initializes undef vector value to +// zero to prevent register allocation resulting in a constraint violated result +// for vector instruction. +// +// RISC-V vector instruction has register overlapping constraint for certain +// instructions, and will cause illegal instruction trap if violated, we use +// early clobber to model this constraint, but it can't prevent register +// allocator allocated same or overlapped if the input register is undef value, +// so convert IMPLICIT_DEF to zero initialized could prevent that happen, it's +// not best way to resolve this, and it might emit redundant zero initialized +// instruction for undef value, so ideally we should model the constraint right, +// but before we model the constraint right, it's the only way to prevent that +// happen. +// +// See also: https://github.com/llvm/llvm-project/issues/50157 +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "riscv-init-undef" +#define RISCV_INIT_UNDEF_NAME "RISCV init undef pass" + +namespace { + +class RISCVInitUndef : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + +public: + static char ID; + + RISCVInitUndef() : MachineFunctionPass(ID) { + initializeRISCVInitUndefPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_INIT_UNDEF_NAME; } + +private: + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + bool handleImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &Inst); + bool isVectorRegClass(const Register &R); +}; + +} // end anonymous namespace + +char RISCVInitUndef::ID = 0; + +INITIALIZE_PASS(RISCVInitUndef, DEBUG_TYPE, RISCV_INIT_UNDEF_NAME, false, false) + +bool RISCVInitUndef::isVectorRegClass(const Register &R) { + unsigned RegClassID = MRI->getRegClass(R)->getID(); + switch (RegClassID) { + case RISCV::VRRegClassID: + case RISCV::VRM2RegClassID: + case RISCV::VRM4RegClassID: + case RISCV::VRM8RegClassID: + return true; + default: + return false; + } +} + +bool RISCVInitUndef::handleImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &Inst) { + MachineInstr &MI = *Inst; + const TargetRegisterInfo &TRI = + *MBB.getParent()->getSubtarget().getRegisterInfo(); + + assert(MI.getOpcode() == TargetOpcode::IMPLICIT_DEF); + // All vector registers must be explicitly defined to prevent violate vector + // register constaint. + unsigned Reg = MI.getOperand(0).getReg(); + // This is a physreg implicit-def. + // Look for the first instruction to use or define an alias. + bool NeedZeroInit = false; + + if (Register::isVirtualRegister(Reg)) { + // For virtual registers, mark all uses as , and convert users to + // implicit-def when possible. + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MO.setIsUndef(); + MachineInstr *UserMI = MO.getParent(); + + if (UserMI->getOpcode() == TargetOpcode::INSERT_SUBREG) { + // FIXME: Should zero init subreg other than the insert part is enough. + NeedZeroInit = true; + break; + } + + bool HasEarlyClobber = false; + bool TiedToDef = false; + for (MachineOperand &UseMO : UserMI->operands()) { + if (!UseMO.isReg()) + continue; + if (UseMO.isEarlyClobber()) { + HasEarlyClobber = true; + } else if (UseMO.isUse() && UseMO.isTied() && + TRI.regsOverlap(UseMO.getReg(), Reg)) { + TiedToDef = true; + } + } + if (HasEarlyClobber && !TiedToDef) + NeedZeroInit = true; + } + } + + if (!NeedZeroInit) + return false; + + LLVM_DEBUG( + dbgs() + << "Emitting vmv.v.i vd, 0 with VLMAX for implicit vector register " + << Reg << '\n'); + + unsigned Opcode; + unsigned RegClassID = MRI->getRegClass(Reg)->getID(); + switch (RegClassID) { + case RISCV::VRRegClassID: + Opcode = RISCV::PseudoVMV_V_I_M1; + break; + case RISCV::VRM2RegClassID: + Opcode = RISCV::PseudoVMV_V_I_M2; + break; + case RISCV::VRM4RegClassID: + Opcode = RISCV::PseudoVMV_V_I_M4; + break; + case RISCV::VRM8RegClassID: + Opcode = RISCV::PseudoVMV_V_I_M8; + break; + default: + llvm_unreachable("Unexpected register class?"); + } + + BuildMI(MBB, Inst, MI.getDebugLoc(), TII->get(Opcode), Reg) + .addImm(0) + .addImm(/* AVL=VLMAX */ -1) + .addImm(/* SEW */ 4); + + Inst = MBB.erase(Inst); // Remove the pseudo instruction + + // We want to leave I pointing to the previous instruction, but what if we + // just erased the first instruction? + if (Inst == MBB.begin()) { + LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n"); + Inst = BuildMI(MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL)); + } else + --Inst; + + return true; +} + +bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, + MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { + MachineInstr &MI = *I; + if (MI.isImplicitDef()) { + auto DstReg = MI.getOperand(0).getReg(); + if (isVectorRegClass(DstReg)) + Changed |= handleImplicitDef(MBB, I); + } + } + return Changed; +} + +bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) { + // return false; + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasVInstructions()) + return false; + + MRI = &MF.getRegInfo(); + TII = ST.getInstrInfo(); + + bool Changed = false; + for (MachineBasicBlock &BB : MF) + Changed |= processBasicBlock(MF, BB); + + return Changed; +} + +FunctionPass *llvm::createRISCVInitUndefPass() { return new RISCVInitUndef(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -53,6 +53,7 @@ initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); + initializeRISCVInitUndefPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -253,6 +254,9 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); + + if (getOptimizeRegAlloc()) + addPass(createRISCVInitUndefPass()); addPass(createRISCVInsertVSETVLIPass()); } diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -96,6 +96,7 @@ ; CHECK-NEXT: Remove dead machine instructions ; RV64-NEXT: RISCV sext.w Removal ; CHECK-NEXT: RISCV Merge Base Offset +; CHECK-NEXT: RISCV init undef pass ; CHECK-NEXT: RISCV Insert VSETVLI pass ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -24,6 +24,8 @@ ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: li a0, 55 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v8, (a0), v8 ; CHECK-NEXT: csrr a0, vlenb @@ -35,9 +37,11 @@ ; CHECK-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: li s0, 36 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, s0, e16, m4, tu, mu ; CHECK-NEXT: vfwadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb @@ -47,6 +51,9 @@ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: call func@plt ; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu ; CHECK-NEXT: vrgather.vv v4, v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, mu @@ -100,6 +107,8 @@ ; SUBREGLIVENESS-NEXT: slli a0, a0, 4 ; SUBREGLIVENESS-NEXT: sub sp, sp, a0 ; SUBREGLIVENESS-NEXT: li a0, 55 +; SUBREGLIVENESS-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v8, (a0), v8 ; SUBREGLIVENESS-NEXT: csrr a0, vlenb @@ -111,15 +120,20 @@ ; SUBREGLIVENESS-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: add a0, a0, a1 ; SUBREGLIVENESS-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill -; SUBREGLIVENESS-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; SUBREGLIVENESS-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; SUBREGLIVENESS-NEXT: vmclr.m v0 ; SUBREGLIVENESS-NEXT: li s0, 36 +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 ; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, tu, mu ; SUBREGLIVENESS-NEXT: vfwadd.vv v8, v8, v8, v0.t ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 ; SUBREGLIVENESS-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: call func@plt ; SUBREGLIVENESS-NEXT: li a0, 32 +; SUBREGLIVENESS-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, tu, mu ; SUBREGLIVENESS-NEXT: vrgather.vv v16, v8, v8, v0.t ; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -472,6 +472,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v11, v10, a0 ; CHECK-NEXT: vslidedown.vx v8, v9, a0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vi v9, v11, 0 ; CHECK-NEXT: add a1, a0, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll @@ -111,8 +111,10 @@ ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a0, a1, .LBB7_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 @@ -122,6 +124,8 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu ; CHECK-NEXT: vslideup.vi v16, v8, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -70,8 +70,10 @@ ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a0, a1, .LBB4_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 64 @@ -81,6 +83,8 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu ; CHECK-NEXT: vslideup.vx v16, v8, a1 @@ -278,7 +282,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v28, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v16, (a3) +; CHECK-NEXT: vle64.v v8, (a3) ; CHECK-NEXT: addi t0, a5, -16 ; CHECK-NEXT: addi a6, a1, 512 ; CHECK-NEXT: mv a3, a2 @@ -286,10 +290,10 @@ ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a3, t0 ; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vle64.v v16, (a6) ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, mu ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: vncvt.x.x.w v24, v16, v0.t +; CHECK-NEXT: vncvt.x.x.w v24, v8, v0.t ; CHECK-NEXT: csrr a6, vlenb ; CHECK-NEXT: slli a6, a6, 4 ; CHECK-NEXT: add a6, sp, a6 @@ -302,13 +306,13 @@ ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu ; CHECK-NEXT: li a5, 64 ; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t ; CHECK-NEXT: csrr a6, vlenb ; CHECK-NEXT: li t0, 48 ; CHECK-NEXT: mul a6, a6, t0 ; CHECK-NEXT: add a6, sp, a6 ; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a7, a5, .LBB16_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: li a7, 64 @@ -362,7 +366,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v2, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v24, (t0) +; CHECK-NEXT: vle64.v v8, (t0) ; CHECK-NEXT: addi t0, a4, -16 ; CHECK-NEXT: addi a6, a1, 256 ; CHECK-NEXT: mv a1, a2 @@ -370,18 +374,26 @@ ; CHECK-NEXT: # %bb.19: ; CHECK-NEXT: mv a1, t0 ; CHECK-NEXT: .LBB16_20: -; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vle64.v v24, (a6) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a3, .LBB16_22 ; CHECK-NEXT: # %bb.21: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB16_22: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vncvt.x.x.w v24, v8, v0.t +; CHECK-NEXT: vncvt.x.x.w v8, v24, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a7, a5, .LBB16_24 ; CHECK-NEXT: # %bb.23: ; CHECK-NEXT: li a7, 32 @@ -399,53 +411,55 @@ ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 40 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 40 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v24, v8, 16 +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v16, 16 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v24, v16, v0.t ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a7, a3, .LBB16_28 ; CHECK-NEXT: # %bb.27: ; CHECK-NEXT: li a7, 16 @@ -456,18 +470,20 @@ ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v0, 0 ; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, mu ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: vse32.v v16, (a0) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a3, 40 @@ -516,8 +532,10 @@ ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB17_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a0, a1, .LBB17_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 @@ -527,6 +545,8 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu ; CHECK-NEXT: vslideup.vi v16, v8, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -1787,10 +1787,14 @@ ; LMULMAX4-NEXT: vncvt.x.x.w v16, v12 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v12, v16 -; LMULMAX4-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX4-NEXT: vmv.v.i v14, 0 +; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v14, v8 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v8, v14 +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX4-NEXT: vmv.v.i v10, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, tu, mu ; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 ; LMULMAX4-NEXT: vse16.v v8, (a0) @@ -1836,7 +1840,12 @@ ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v16, v12 +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX4-NEXT: vmv.v.i v12, 0 +; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v12, v8 +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX4-NEXT: vmv.v.i v8, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, tu, mu ; LMULMAX4-NEXT: vslideup.vi v12, v16, 8 ; LMULMAX4-NEXT: vse32.v v12, (a0) @@ -2136,7 +2145,9 @@ ; CHECK-NEXT: vfncvt.rod.f.f.w v24, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vfncvt.rod.f.f.w v12, v16 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v10, v12 @@ -2152,6 +2163,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v24, v8 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v28, v16 ; CHECK-NEXT: vs8r.v v24, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -39,6 +39,9 @@ ; RV32-V128-LABEL: interleave_v2f64: ; RV32-V128: # %bb.0: ; RV32-V128-NEXT: vmv1r.v v12, v9 +; RV32-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-V128-NEXT: vmv.v.i v10, 0 +; RV32-V128-NEXT: vmv.v.i v10, 0 ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v9, v9, 1 @@ -53,6 +56,9 @@ ; RV64-V128-LABEL: interleave_v2f64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 +; RV64-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-V128-NEXT: vmv.v.i v10, 0 +; RV64-V128-NEXT: vmv.v.i v10, 0 ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -14,6 +14,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, mu ; CHECK-NEXT: vslideup.vi v8, v12, 0 ; CHECK-NEXT: ret @@ -27,6 +29,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, mu ; CHECK-NEXT: vslideup.vi v8, v12, 2 ; CHECK-NEXT: ret @@ -40,6 +44,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, mu ; CHECK-NEXT: vslideup.vi v8, v12, 6 ; CHECK-NEXT: ret @@ -53,20 +59,27 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v12, (a0) +; LMULMAX2-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX2-NEXT: vmv.v.i v16, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, mu ; LMULMAX2-NEXT: vslideup.vi v8, v12, 0 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vle32.v v12, (a1) +; LMULMAX1-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v16, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v16, (a0) +; LMULMAX1-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v20, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v12, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v16, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v16, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v12, 4 ; LMULMAX1-NEXT: ret %sv = load <8 x i32>, <8 x i32>* %svp %v = call @llvm.vector.insert.v8i32.nxv8i32( %vec, <8 x i32> %sv, i64 0) @@ -78,20 +91,27 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v12, (a0) +; LMULMAX2-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX2-NEXT: vmv.v.i v16, 0 ; LMULMAX2-NEXT: vsetivli zero, 16, e32, m4, tu, mu ; LMULMAX2-NEXT: vslideup.vi v8, v12, 8 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vle32.v v12, (a1) +; LMULMAX1-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v16, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v16, (a0) +; LMULMAX1-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v20, 0 ; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v12, 8 +; LMULMAX1-NEXT: vslideup.vi v8, v16, 8 ; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v16, 12 +; LMULMAX1-NEXT: vslideup.vi v8, v12, 12 ; LMULMAX1-NEXT: ret %sv = load <8 x i32>, <8 x i32>* %svp %v = call @llvm.vector.insert.v8i32.nxv8i32( %vec, <8 x i32> %sv, i64 8) @@ -103,6 +123,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: ret %sv = load <2 x i32>, <2 x i32>* %svp %v = call @llvm.vector.insert.v2i32.nxv8i32( undef, <2 x i32> %sv, i64 0) @@ -165,6 +187,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, mu @@ -196,6 +220,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, mu @@ -226,6 +252,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, tu, mu @@ -256,6 +284,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, tu, mu ; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 ; LMULMAX2-NEXT: vse32.v v10, (a0) @@ -513,7 +543,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, mu ; CHECK-NEXT: vslideup.vi v8, v16, 4 ; CHECK-NEXT: vs8r.v v8, (a2) @@ -531,6 +566,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vs8r.v v8, (a1) ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv @@ -544,6 +581,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, mu ; CHECK-NEXT: vslideup.vi v16, v8, 2 ; CHECK-NEXT: vs8r.v v16, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -43,13 +43,17 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a3, zero, e16, m2, ta, mu +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: lw a3, 16(a0) ; RV32-NEXT: addi a4, a0, 20 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vlse32.v v10, (a4), zero ; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; RV32-NEXT: vmv.s.x v10, a3 -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV32-NEXT: vsetvli a3, zero, e16, m2, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu ; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, mu ; RV32-NEXT: vmv.v.i v10, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -52,6 +52,9 @@ ; RV32-V128-LABEL: interleave_v2i64: ; RV32-V128: # %bb.0: ; RV32-V128-NEXT: vmv1r.v v12, v9 +; RV32-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-V128-NEXT: vmv.v.i v10, 0 +; RV32-V128-NEXT: vmv.v.i v10, 0 ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v9, v9, 1 @@ -66,6 +69,9 @@ ; RV64-V128-LABEL: interleave_v2i64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 +; RV64-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-V128-NEXT: vmv.v.i v10, 0 +; RV64-V128-NEXT: vmv.v.i v10, 0 ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12661,6 +12661,8 @@ ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-NEXT: vmv1r.v v12, v10 ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t +; RV64V-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; RV64V-NEXT: vmv.v.i v14, 0 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, mu ; RV64V-NEXT: vslidedown.vi v10, v10, 16 ; RV64V-NEXT: vslidedown.vi v8, v8, 16 @@ -12670,6 +12672,8 @@ ; RV64V-NEXT: vslidedown.vi v0, v0, 2 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64V-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64V-NEXT: vmv.v.i v8, 0 ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, tu, mu ; RV64V-NEXT: vslideup.vi v12, v10, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -300,7 +300,9 @@ ; RV64-NEXT: vslidedown.vi v0, v10, 2 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, mu ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmv.v.i v14, 0 ; RV64-NEXT: bltu a1, a2, .LBB13_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 @@ -310,6 +312,8 @@ ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, mu ; RV64-NEXT: vslideup.vi v8, v12, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -367,6 +367,8 @@ define @insert_nxv32f16_undef_nxv1f16_0( %subvec) { ; CHECK-LABEL: insert_nxv32f16_undef_nxv1f16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv1f16.nxv32f16( undef, %subvec, i64 0) ret %v @@ -381,6 +383,8 @@ ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v14, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv1f16.nxv32f16( undef, %subvec, i64 26) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2213,6 +2213,8 @@ ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu @@ -2237,6 +2239,8 @@ ; RV32-NEXT: vsext.vf4 v16, v8 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu ; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, mu @@ -2255,6 +2259,8 @@ ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; RV64-NEXT: vmv.v.i v20, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -502,15 +502,17 @@ ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, mu ; RV32-BITS-UNKNOWN-NEXT: vid.v v8 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v24, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v24, v16 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v28, v16 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; @@ -536,15 +538,17 @@ ; RV32-BITS-512-NEXT: addi a0, a0, -1 ; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, mu ; RV32-BITS-512-NEXT: vid.v v8 -; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-512-NEXT: vrsub.vx v16, v8, a0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-512-NEXT: vmv.v.i v16, 0 -; RV32-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 +; RV32-BITS-512-NEXT: vmerge.vim v24, v8, 1, v0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV32-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV32-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV32-BITS-512-NEXT: vrgather.vv v12, v24, v16 +; RV32-BITS-512-NEXT: vrgather.vv v8, v28, v16 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-512-NEXT: vmv.v.i v16, 0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-512-NEXT: vand.vi v8, v24, 1 +; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-512-NEXT: ret ; @@ -555,15 +559,17 @@ ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, mu ; RV64-BITS-UNKNOWN-NEXT: vid.v v8 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v24, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v24, v16 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v28, v16 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; @@ -589,15 +595,17 @@ ; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, mu ; RV64-BITS-512-NEXT: vid.v v8 -; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vrsub.vx v16, v8, a0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-512-NEXT: vmv.v.i v16, 0 -; RV64-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 +; RV64-BITS-512-NEXT: vmerge.vim v24, v8, 1, v0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV64-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV64-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV64-BITS-512-NEXT: vrgather.vv v12, v24, v16 +; RV64-BITS-512-NEXT: vrgather.vv v8, v28, v16 +; RV64-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-512-NEXT: vmv.v.i v16, 0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-512-NEXT: vand.vi v8, v24, 1 +; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-512-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv64i1( %a) @@ -1082,7 +1090,9 @@ ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, mu ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 -; RV32-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv64i8: @@ -1107,6 +1117,8 @@ ; RV32-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV32-BITS-512-NEXT: vrgather.vv v16, v12, v24 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmv8r.v v8, v16 ; RV32-BITS-512-NEXT: ret ; @@ -1121,7 +1133,9 @@ ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, mu ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 -; RV64-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv64i8: @@ -1146,6 +1160,8 @@ ; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV64-BITS-512-NEXT: vrgather.vv v16, v12, v24 +; RV64-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmv8r.v v8, v16 ; RV64-BITS-512-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv64i8( %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -114,6 +114,8 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptrunc.nxv16f64.nxv16f32( %a, %m, i32 %vl) @@ -131,10 +133,7 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a4, a1, 1 @@ -162,31 +161,30 @@ ; CHECK-NEXT: .LBB8_6: ; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vx v1, v24, a7 +; CHECK-NEXT: vslidedown.vx v25, v24, a7 ; CHECK-NEXT: add a7, a0, t0 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu -; CHECK-NEXT: sub a4, a2, a4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: slli a5, a5, 3 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a5, zero, e16, m8, ta, mu +; CHECK-NEXT: sub a4, a2, a4 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: bltu a2, a4, .LBB8_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: mv a6, a4 ; CHECK-NEXT: .LBB8_8: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu -; CHECK-NEXT: vl8re64.v v16, (a7) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a7) ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: sub a4, a6, a1 -; CHECK-NEXT: vslidedown.vx v0, v1, a3 +; CHECK-NEXT: vslidedown.vx v0, v25, a3 ; CHECK-NEXT: bltu a6, a4, .LBB8_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: mv a2, a4 @@ -195,21 +193,23 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: bltu a6, a1, .LBB8_12 ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: .LBB8_12: ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -281,6 +281,8 @@ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv32i8: @@ -288,7 +290,7 @@ ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a5, a3, 1 ; RV64-NEXT: sub a6, a1, a5 -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: li a4, 0 ; RV64-NEXT: li a2, 0 ; RV64-NEXT: bltu a1, a6, .LBB12_2 @@ -303,10 +305,10 @@ ; RV64-NEXT: .LBB12_4: ; RV64-NEXT: srli a6, a3, 2 ; RV64-NEXT: vsetvli t0, zero, e8, mf2, ta, mu -; RV64-NEXT: vslidedown.vx v13, v12, a6 +; RV64-NEXT: vslidedown.vx v12, v13, a6 ; RV64-NEXT: srli a6, a3, 3 ; RV64-NEXT: vsetvli t0, zero, e8, mf4, ta, mu -; RV64-NEXT: vslidedown.vx v0, v13, a6 +; RV64-NEXT: vslidedown.vx v0, v12, a6 ; RV64-NEXT: vsetvli t0, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, mu @@ -321,7 +323,7 @@ ; RV64-NEXT: mv a4, a5 ; RV64-NEXT: .LBB12_8: ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu -; RV64-NEXT: vslidedown.vx v0, v12, a6 +; RV64-NEXT: vslidedown.vx v0, v13, a6 ; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli zero, a4, e8, m1, ta, mu @@ -333,8 +335,10 @@ ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: bltu a2, a3, .LBB12_12 ; RV64-NEXT: # %bb.11: ; RV64-NEXT: mv a2, a3 @@ -342,7 +346,7 @@ ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -181,6 +181,8 @@ ; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv15i16.nxv15i64( %a, %m, i32 %vl) @@ -241,6 +243,8 @@ ; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu ; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv32i7.nxv32i32( %a, %m, i32 %vl) @@ -277,6 +281,8 @@ ; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu ; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv32i8.nxv32i32( %a, %m, i32 %vl) @@ -294,10 +300,7 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a4, a1, 1 @@ -325,31 +328,30 @@ ; CHECK-NEXT: .LBB17_6: ; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vx v1, v24, a7 +; CHECK-NEXT: vslidedown.vx v25, v24, a7 ; CHECK-NEXT: add a7, a0, t0 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu -; CHECK-NEXT: sub a4, a2, a4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: slli a5, a5, 3 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a5, zero, e16, m8, ta, mu +; CHECK-NEXT: sub a4, a2, a4 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: bltu a2, a4, .LBB17_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: mv a6, a4 ; CHECK-NEXT: .LBB17_8: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu -; CHECK-NEXT: vl8re64.v v16, (a7) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a7) ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: sub a4, a6, a1 -; CHECK-NEXT: vslidedown.vx v0, v1, a3 +; CHECK-NEXT: vslidedown.vx v0, v25, a3 ; CHECK-NEXT: bltu a6, a4, .LBB17_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: mv a2, a4 @@ -358,21 +360,23 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v20, v24, v0.t +; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: bltu a6, a1, .LBB17_12 ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: .LBB17_12: ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0