Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -110,6 +110,8 @@ return false; } + uint8_t getSEW() const { return SEW; } + bool hasSameAVL(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); @@ -193,6 +195,10 @@ return getSEWLMULRatio(SEW, VLMul); } + RISCVII::VLMUL getVLMUL() const { + return VLMul; + } + // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. bool hasSameVLMAX(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && @@ -306,6 +312,25 @@ return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul); } + void dump() { + dbgs() << (isValid() ? "valid" : "invalid") << ", " + << (isUnknown() ? "unknown" : "known") << "\n"; + + if (hasAVLReg()) + dbgs() << "AVLReg=" << AVLReg << ", "; + if (hasAVLImm()) + dbgs() << "AVLImm=" << AVLImm << ", "; + + dbgs() << "VLMul=" << (unsigned)VLMul << ", " + << "SEW=" << (unsigned)SEW << ", " + << "TailAgnostic=" << (bool)TailAgnostic << ", " + << "MaskAgnostic=" << (bool)MaskAgnostic << ", " + << "MaskRegOp=" << (bool)MaskRegOp << ", " + << "StoreOp=" << (bool)StoreOp << ", " + << "ScalarMovOp=" << (bool)ScalarMovOp << ", " + << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "\n"; + } + bool operator==(const VSETVLIInfo &Other) const { // Uninitialized is only equal to another Uninitialized. if (!isValid()) @@ -438,6 +463,7 @@ bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); + void doPRE(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -1021,7 +1047,9 @@ } void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { - VSETVLIInfo CurInfo; + LLVM_DEBUG(dbgs() << "Inserting VSETVLIs in " << MBB.getName() << "\n"); + + VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. MachineInstr *PrevVSETVLIMI = nullptr; @@ -1059,7 +1087,9 @@ MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, /*isImp*/ true)); + assert(CurInfo.isValid()); if (!CurInfo.isValid()) { + // FIXME: We should just record the entry at top of loop, and merge this code. // We haven't found any vector instructions or VL/VTYPE changes yet, // use the predecessor information. assert(BlockInfo[MBB.getNumber()].Pred.isValid() && @@ -1122,13 +1152,83 @@ // expected info, insert a vsetvli to correct. if (MI.isTerminator()) { const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; - if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && + if (ExitInfo.isValid() && !ExitInfo.isUnknown() && CurInfo != ExitInfo) { insertVSETVLI(MBB, MI, ExitInfo, CurInfo); CurInfo = ExitInfo; } } } + + if (MBB.canFallThrough()) { + // TODO: use iterators and insert at end! + const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; + if (ExitInfo.isValid() && !ExitInfo.isUnknown() && + CurInfo != ExitInfo) { + insertVSETVLI(MBB, *MBB.getLastNonDebugInstr(), ExitInfo, CurInfo); + CurInfo = ExitInfo; + } + } +} + + +/// Return true if the VL value confiuured must be equal to the requested one. +static bool hasFixedResult(VSETVLIInfo Info, const RISCVSubtarget &ST) { + if (!Info.hasAVLImm()) + return false; + + if (RISCVII::LMUL_1 != Info.getVLMUL()) + return false; + + unsigned AVL = Info.getAVLImm(); + unsigned SEW = Info.getSEW(); + unsigned AVLInBits = AVL * SEW; + return ST.getMinRVVVectorSizeInBits() >= AVLInBits; +} + +/// Perform simple partial redundancy elimination of the VSETVL instructions +/// we're about to insert by looking for cases where we can PRE from the +/// begining of one block to the end of one of it's predecessors. Specifically, +/// this is geared to catch the common case of a fixed length vsetvl in a single +/// block loop when it could execute once in the preheader instead. +void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const RISCVSubtarget &ST = MF.getSubtarget(); + + MachineBasicBlock *UnavailablePred = nullptr; + MachineBasicBlock *OtherPred = nullptr; + for (MachineBasicBlock *P : MBB.predecessors()) { + if (BlockInfo[P->getNumber()].Exit.isUnknown()) { + if (UnavailablePred) return; + UnavailablePred = P; + } else if (!OtherPred) + OtherPred = P; + else if (BlockInfo[OtherPred->getNumber()].Exit != BlockInfo[P->getNumber()].Exit) + return; + } + + // unreachable or single pred block + if (!UnavailablePred || !OtherPred) + return; + + if (UnavailablePred->succ_size() != 1) + // critical edge + return; + + if (!UnavailablePred->getFallThrough()) + // terminator might use or modify vtype + // TODO: handle common branch cases + return; + + if (!hasFixedResult(BlockInfo[OtherPred->getNumber()].Exit, ST)) + // If VL can be less than AVL, then reducing the frequency of execution + // is illegal. + return; + + LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " << UnavailablePred->getName() << "\n"); + //BlockInfo[OtherPred->getNumber()].Exit.dump(); + BlockInfo[UnavailablePred->getNumber()].Exit = BlockInfo[OtherPred->getNumber()].Exit; + BlockInfo[MBB.getNumber()].Pred = BlockInfo[OtherPred->getNumber()].Exit; } bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { @@ -1144,10 +1244,15 @@ BlockInfo.resize(MF.getNumBlockIDs()); bool HaveVectorOp = false; + DenseSet BlocksWithVectorOp; // Phase 1 - determine how VL/VTYPE are affected by the each block. - for (const MachineBasicBlock &MBB : MF) - HaveVectorOp |= computeVLVTYPEChanges(MBB); + for (const MachineBasicBlock &MBB : MF) { + bool BlockHasVectorOp = computeVLVTYPEChanges(MBB); + if (BlockHasVectorOp) + BlocksWithVectorOp.insert(&MBB); + HaveVectorOp |= BlockHasVectorOp; + } // If we didn't find any instructions that need VSETVLI, we're done. if (HaveVectorOp) { @@ -1164,6 +1269,13 @@ computeIncomingVLVTYPE(MBB); } + for (MachineBasicBlock &MBB : MF) { + if (BlockInfo[MBB.getNumber()].Pred.isUnknown() && + BlocksWithVectorOp.count(&MBB)) { + doPRE(MBB); + } + } + // Phase 3 - add any vsetvli instructions needed in the block. Use the // Phase 2 information to avoid adding vsetvlis before the first vector // instruction in the block if the VL/VTYPE is satisfied by its Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefix=CHECK-ASM @@ -516,6 +517,7 @@ ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: li a3, 32 +; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: li a4, 16 ; CHECK-ASM-NEXT: .LBB7_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 @@ -599,11 +601,11 @@ ; CHECK-ASM-NEXT: addi a0, a0, 32 ; CHECK-ASM-NEXT: addi a1, a1, 132 ; CHECK-ASM-NEXT: li a2, 1024 +; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: li a3, 16 ; CHECK-ASM-NEXT: .LBB8_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: addi a4, a1, -128 -; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: vlse32.v v8, (a4), a3 ; CHECK-ASM-NEXT: vlse32.v v9, (a1), a3 ; CHECK-ASM-NEXT: addi a4, a0, -32 @@ -727,10 +729,10 @@ ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 256 ; CHECK-ASM-NEXT: li a3, 64 +; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: li a4, 16 ; CHECK-ASM-NEXT: .LBB9_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: vlse32.v v8, (a1), a3 ; CHECK-ASM-NEXT: vlse32.v v9, (a0), a4 ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 @@ -840,10 +842,10 @@ ; CHECK-ASM: # %bb.0: ; CHECK-ASM-NEXT: addi a0, a0, 16 ; CHECK-ASM-NEXT: li a2, 1024 +; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-ASM-NEXT: li a3, 40 ; CHECK-ASM-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: addi a4, a1, 80 -; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-ASM-NEXT: vlse64.v v8, (a1), a3 ; CHECK-ASM-NEXT: vlse64.v v9, (a4), a3 ; CHECK-ASM-NEXT: addi a4, a0, -16 @@ -914,10 +916,10 @@ ; CHECK-ASM: # %bb.0: ; CHECK-ASM-NEXT: addi a1, a1, 16 ; CHECK-ASM-NEXT: li a2, 1024 +; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-ASM-NEXT: li a3, 40 ; CHECK-ASM-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: addi a4, a1, -16 -; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-ASM-NEXT: vle64.v v8, (a4) ; CHECK-ASM-NEXT: vle64.v v9, (a1) ; CHECK-ASM-NEXT: addi a4, a0, 80 Index: llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5,10 +5,10 @@ define void @sink_splat_mul(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_mul: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -41,10 +41,10 @@ define void @sink_splat_add(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_add: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB1_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -77,10 +77,10 @@ define void @sink_splat_sub(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB2_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -113,10 +113,10 @@ define void @sink_splat_rsub(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_rsub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB3_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -149,10 +149,10 @@ define void @sink_splat_and(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_and: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB4_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -185,10 +185,10 @@ define void @sink_splat_or(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_or: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB5_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -221,10 +221,10 @@ define void @sink_splat_xor(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_xor: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB6_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -901,10 +901,10 @@ define void @sink_splat_shl(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_shl: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB14_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -937,10 +937,10 @@ define void @sink_splat_lshr(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_lshr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB15_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsrl.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -973,10 +973,10 @@ define void @sink_splat_ashr(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_ashr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB16_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsra.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1285,10 +1285,10 @@ define void @sink_splat_fmul(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB20_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1321,10 +1321,10 @@ define void @sink_splat_fdiv(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB21_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1357,10 +1357,10 @@ define void @sink_splat_frdiv(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1393,10 +1393,10 @@ define void @sink_splat_fadd(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB23_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1429,10 +1429,10 @@ define void @sink_splat_fsub(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB24_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1465,10 +1465,10 @@ define void @sink_splat_frsub(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB25_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2047,10 +2047,10 @@ define void @sink_splat_fma(float* noalias nocapture %a, float* nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB32_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 @@ -2088,10 +2088,10 @@ define void @sink_splat_fma_commute(float* noalias nocapture %a, float* nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB33_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 @@ -2414,10 +2414,10 @@ define void @sink_splat_udiv(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_udiv: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB38_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdivu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2450,10 +2450,10 @@ define void @sink_splat_sdiv(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sdiv: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB39_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdiv.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2486,10 +2486,10 @@ define void @sink_splat_urem(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_urem: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB40_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vremu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2522,10 +2522,10 @@ define void @sink_splat_srem(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_srem: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB41_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrem.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2929,9 +2929,9 @@ ; CHECK-LABEL: sink_splat_vp_mul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB46_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t @@ -2969,9 +2969,9 @@ ; CHECK-LABEL: sink_splat_vp_add: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB47_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t @@ -3051,9 +3051,9 @@ ; CHECK-LABEL: sink_splat_vp_sub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB49_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t @@ -3089,9 +3089,9 @@ ; CHECK-LABEL: sink_splat_vp_rsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB50_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t @@ -3129,9 +3129,9 @@ ; CHECK-LABEL: sink_splat_vp_shl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB51_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t @@ -3169,9 +3169,9 @@ ; CHECK-LABEL: sink_splat_vp_lshr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB52_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t @@ -3209,9 +3209,9 @@ ; CHECK-LABEL: sink_splat_vp_ashr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB53_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t @@ -3249,9 +3249,9 @@ ; CHECK-LABEL: sink_splat_vp_fmul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB54_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t @@ -3289,9 +3289,9 @@ ; CHECK-LABEL: sink_splat_vp_fdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB55_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t @@ -3327,9 +3327,9 @@ ; CHECK-LABEL: sink_splat_vp_frdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB56_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t @@ -3367,9 +3367,9 @@ ; CHECK-LABEL: sink_splat_vp_fadd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB57_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t @@ -3407,9 +3407,9 @@ ; CHECK-LABEL: sink_splat_vp_fsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB58_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t @@ -3447,9 +3447,9 @@ ; CHECK-LABEL: sink_splat_vp_frsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB59_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t @@ -3487,9 +3487,9 @@ ; CHECK-LABEL: sink_splat_vp_udiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB60_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t @@ -3527,9 +3527,9 @@ ; CHECK-LABEL: sink_splat_vp_sdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB61_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t @@ -3567,9 +3567,9 @@ ; CHECK-LABEL: sink_splat_vp_urem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB62_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t @@ -3607,9 +3607,9 @@ ; CHECK-LABEL: sink_splat_vp_srem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB63_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t @@ -3688,9 +3688,9 @@ ; CHECK-LABEL: sink_splat_vp_fma: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB65_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu @@ -3731,9 +3731,9 @@ ; CHECK-LABEL: sink_splat_vp_fma_commute: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB66_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -27,9 +27,11 @@ ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) @@ -58,9 +60,11 @@ ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret @@ -90,11 +94,12 @@ ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: vfadd.vv v9, v8, v9 -; CHECK-NEXT: vfmul.vv v8, v9, v8 -; CHECK-NEXT: ret +; CHECK-NEXT: j .LBB2_3 ; CHECK-NEXT: .LBB2_2: # %if.else ; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: vfsub.vv v9, v8, v9 +; CHECK-NEXT: .LBB2_3: # %if.end +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret entry: @@ -183,6 +188,7 @@ ; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: bnez a2, .LBB4_3 ; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: andi a0, a1, 2 ; CHECK-NEXT: beqz a0, .LBB4_4 @@ -191,6 +197,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB4_3: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: andi a0, a1, 2 ; CHECK-NEXT: bnez a0, .LBB4_2 ; CHECK-NEXT: .LBB4_4: # %if.else5 @@ -240,6 +247,7 @@ ; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, mu ; CHECK-NEXT: bnez a3, .LBB5_3 ; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB5_4 @@ -258,6 +266,7 @@ ; CHECK-NEXT: j .LBB5_5 ; CHECK-NEXT: .LBB5_3: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: bnez a1, .LBB5_2 ; CHECK-NEXT: .LBB5_4: # %if.else5 @@ -337,6 +346,7 @@ ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: j .LBB6_3 ; CHECK-NEXT: .LBB6_2: # %if.else ; CHECK-NEXT: csrr a0, vlenb @@ -411,6 +421,7 @@ ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: j .LBB7_3 ; CHECK-NEXT: .LBB7_2: # %if.else +; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: .LBB7_3: # %if.end ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu @@ -449,11 +460,12 @@ ; CHECK-NEXT: beqz a3, .LBB8_2 ; CHECK-NEXT: .LBB8_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: slli a4, a3, 2 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, mu +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, mu ; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vse32.v v16, (a2) ; CHECK-NEXT: sub a0, a0, a3 @@ -505,8 +517,8 @@ ; CHECK-NEXT: andi a0, a3, 1 ; CHECK-NEXT: beqz a0, .LBB9_2 ; CHECK-NEXT: # %bb.1: # %if -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vwcvt.x.x.v v8, v10 ; CHECK-NEXT: .LBB9_2: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -544,8 +556,8 @@ ; CHECK-NEXT: andi a0, a4, 1 ; CHECK-NEXT: beqz a0, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vwadd.wv v9, v9, v10 ; CHECK-NEXT: .LBB10_2: # %if.end ; CHECK-NEXT: andi a0, a5, 1 Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -102,23 +102,24 @@ define void @test6(i32* nocapture readonly %A, i32* nocapture %B, i64 %n) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a6, a2, e32, m1, ta, mu -; CHECK-NEXT: beqz a6, .LBB5_3 +; CHECK-NEXT: vsetvli a3, a2, e32, m1, ta, mu +; CHECK-NEXT: beqz a3, .LBB5_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: .LBB5_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a3, a0, a5 -; CHECK-NEXT: vle32.v v8, (a3) +; CHECK-NEXT: add a6, a0, a5 +; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a6) ; CHECK-NEXT: vmsle.vi v9, v8, -3 ; CHECK-NEXT: vmsgt.vi v10, v8, 2 ; CHECK-NEXT: vmor.mm v0, v9, v10 -; CHECK-NEXT: add a3, a1, a5 -; CHECK-NEXT: vse32.v v8, (a3), v0.t -; CHECK-NEXT: add a4, a4, a6 -; CHECK-NEXT: vsetvli a6, a2, e32, m1, ta, mu -; CHECK-NEXT: bnez a6, .LBB5_2 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: vse32.v v8, (a5), v0.t +; CHECK-NEXT: add a4, a4, a3 +; CHECK-NEXT: vsetvli a3, a2, e32, m1, ta, mu +; CHECK-NEXT: bnez a3, .LBB5_2 ; CHECK-NEXT: .LBB5_3: # %for.cond.cleanup ; CHECK-NEXT: ret entry: