diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -101,6 +101,10 @@ assert(hasAVLImm()); return AVLImm; } + + unsigned getSEW() const { return SEW; } + RISCVII::VLMUL getVLMUL() const { return VLMul; } + bool hasZeroAVL() const { if (hasAVLImm()) return getAVLImm() == 0; @@ -458,6 +462,7 @@ void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doLocalPrepass(MachineBasicBlock &MBB); + void doPRE(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -1278,6 +1283,98 @@ } } +/// Return true if the VL value configured must be equal to the requested one. +static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { + if (!Info.hasAVLImm()) + // TODO: Could allow VLMAX (e.g. X0), and possibly other registers + // by looking at the associated vreg def placement. + return false; + + if (RISCVII::LMUL_1 != Info.getVLMUL()) + // TODO: Generalize the code below to account for LMUL + return false; + + unsigned AVL = Info.getAVLImm(); + unsigned SEW = Info.getSEW(); + unsigned AVLInBits = AVL * SEW; + return ST.getRealMinVLen() >= AVLInBits; +} + +/// Perform simple partial redundancy elimination of the VSETVLI instructions +/// we're about to insert by looking for cases where we can PRE from the +/// beginning of one block to the end of one of its predecessors. Specifically, +/// this is geared to catch the common case of a fixed length vsetvl in a single +/// block loop when it could execute once in the preheader instead. +void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const RISCVSubtarget &ST = MF.getSubtarget(); + + if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) + return; + + MachineBasicBlock *UnavailablePred = nullptr; + VSETVLIInfo AvailableInfo; + for (MachineBasicBlock *P : MBB.predecessors()) { + const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; + if (PredInfo.isUnknown()) { + if (UnavailablePred) + return; + UnavailablePred = P; + } else if (!AvailableInfo.isValid()) { + AvailableInfo = PredInfo; + } else if (AvailableInfo != PredInfo) { + return; + } + } + + // unreachable, single pred, or full redundancy. Note that FRE + // is handled by phase 3. + if (!UnavailablePred || !AvailableInfo.isValid()) + return; + + // critical edge - TODO: consider splitting? + if (UnavailablePred->succ_size() != 1) + return; + + // If VL can be less than AVL, then we can't reduce the frequency of exec. + if (!hasFixedResult(AvailableInfo, ST)) + return; + + // Does it actually let us remove an implicit transition in MBB? + bool Found = false; + for (auto &MI : MBB) { + if (isVectorConfigInstr(MI)) + return; + + const uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) + return; + Found = true; + break; + } + } + if (!Found) + return; + + // Finally, update both data flow state and insert the actual vsetvli. + // Doing both keeps the code in sync with the dataflow results, which + // is critical for correctness of phase 3. + auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; + LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " + << UnavailablePred->getName() << " with state " + << AvailableInfo << "\n"); + BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; + BlockInfo[MBB.getNumber()].Pred = AvailableInfo; + + // Note there's an implicit assumption here that terminators never use + // or modify VL or VTYPE. Also, fallthrough will return end(). + auto InsertPt = UnavailablePred->getFirstInstrTerminator(); + insertVSETVLI(*UnavailablePred, InsertPt, + UnavailablePred->findDebugLoc(InsertPt), + AvailableInfo, OldInfo); +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. const RISCVSubtarget &ST = MF.getSubtarget(); @@ -1332,6 +1429,10 @@ computeIncomingVLVTYPE(MBB); } + // Perform partial redundancy elimination of vsetvli transitions. + for (MachineBasicBlock &MBB : MF) + doPRE(MBB); + // Phase 3 - add any vsetvli instructions needed in the block. Use the // Phase 2 information to avoid adding vsetvlis before the first vector // instruction in the block if the VL/VTYPE is satisfied by its diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll @@ -590,10 +590,10 @@ ; CHECK-ASM-NEXT: addi a1, a1, 132 ; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: li a3, 16 +; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: .LBB8_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: addi a4, a1, -128 -; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: vlse32.v v8, (a4), a3 ; CHECK-ASM-NEXT: vlse32.v v9, (a1), a3 ; CHECK-ASM-NEXT: vle32.v v10, (a0) @@ -706,9 +706,9 @@ ; CHECK-ASM-NEXT: li a2, 256 ; CHECK-ASM-NEXT: li a3, 64 ; CHECK-ASM-NEXT: li a4, 16 +; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: .LBB9_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: vlse32.v v8, (a1), a3 ; CHECK-ASM-NEXT: vlse32.v v9, (a0), a4 ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 @@ -818,8 +818,8 @@ ; CHECK-ASM: # %bb.0: ; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: li a3, 40 -; CHECK-ASM-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-ASM-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vlse64.v v8, (a1), a3 ; CHECK-ASM-NEXT: addi a4, a1, 80 ; CHECK-ASM-NEXT: vlse64.v v9, (a4), a3 @@ -891,8 +891,8 @@ ; CHECK-ASM: # %bb.0: ; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: li a3, 40 -; CHECK-ASM-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-ASM-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vle64.v v8, (a1) ; CHECK-ASM-NEXT: addi a4, a1, 16 ; CHECK-ASM-NEXT: vle64.v v9, (a4) diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -6,9 +6,9 @@ ; CHECK-LABEL: sink_splat_mul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -42,9 +42,9 @@ ; CHECK-LABEL: sink_splat_add: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB1_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -78,9 +78,9 @@ ; CHECK-LABEL: sink_splat_sub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB2_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -114,9 +114,9 @@ ; CHECK-LABEL: sink_splat_rsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB3_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -150,9 +150,9 @@ ; CHECK-LABEL: sink_splat_and: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB4_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -186,9 +186,9 @@ ; CHECK-LABEL: sink_splat_or: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB5_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -222,9 +222,9 @@ ; CHECK-LABEL: sink_splat_xor: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB6_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -902,9 +902,9 @@ ; CHECK-LABEL: sink_splat_shl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB14_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -938,9 +938,9 @@ ; CHECK-LABEL: sink_splat_lshr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB15_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsrl.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -974,9 +974,9 @@ ; CHECK-LABEL: sink_splat_ashr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB16_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsra.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1286,9 +1286,9 @@ ; CHECK-LABEL: sink_splat_fmul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB20_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1322,9 +1322,9 @@ ; CHECK-LABEL: sink_splat_fdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB21_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1358,9 +1358,9 @@ ; CHECK-LABEL: sink_splat_frdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1394,9 +1394,9 @@ ; CHECK-LABEL: sink_splat_fadd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB23_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1430,9 +1430,9 @@ ; CHECK-LABEL: sink_splat_fsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB24_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1466,9 +1466,9 @@ ; CHECK-LABEL: sink_splat_frsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB25_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2048,9 +2048,9 @@ ; CHECK-LABEL: sink_splat_fma: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB32_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 @@ -2089,9 +2089,9 @@ ; CHECK-LABEL: sink_splat_fma_commute: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB33_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 @@ -2415,9 +2415,9 @@ ; CHECK-LABEL: sink_splat_udiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB38_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdivu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2451,9 +2451,9 @@ ; CHECK-LABEL: sink_splat_sdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB39_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdiv.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2487,9 +2487,9 @@ ; CHECK-LABEL: sink_splat_urem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB40_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vremu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2523,9 +2523,9 @@ ; CHECK-LABEL: sink_splat_srem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB41_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrem.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2929,9 +2929,9 @@ ; CHECK-LABEL: sink_splat_vp_mul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB46_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t @@ -2969,9 +2969,9 @@ ; CHECK-LABEL: sink_splat_vp_add: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB47_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t @@ -3051,9 +3051,9 @@ ; CHECK-LABEL: sink_splat_vp_sub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB49_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t @@ -3089,9 +3089,9 @@ ; CHECK-LABEL: sink_splat_vp_rsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB50_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t @@ -3129,9 +3129,9 @@ ; CHECK-LABEL: sink_splat_vp_shl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB51_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t @@ -3169,9 +3169,9 @@ ; CHECK-LABEL: sink_splat_vp_lshr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB52_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t @@ -3209,9 +3209,9 @@ ; CHECK-LABEL: sink_splat_vp_ashr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB53_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t @@ -3249,9 +3249,9 @@ ; CHECK-LABEL: sink_splat_vp_fmul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB54_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t @@ -3289,9 +3289,9 @@ ; CHECK-LABEL: sink_splat_vp_fdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB55_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t @@ -3327,9 +3327,9 @@ ; CHECK-LABEL: sink_splat_vp_frdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB56_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t @@ -3367,9 +3367,9 @@ ; CHECK-LABEL: sink_splat_vp_fadd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB57_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t @@ -3407,9 +3407,9 @@ ; CHECK-LABEL: sink_splat_vp_fsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB58_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t @@ -3447,9 +3447,9 @@ ; CHECK-LABEL: sink_splat_vp_frsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB59_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t @@ -3487,9 +3487,9 @@ ; CHECK-LABEL: sink_splat_vp_udiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB60_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t @@ -3527,9 +3527,9 @@ ; CHECK-LABEL: sink_splat_vp_sdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB61_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t @@ -3567,9 +3567,9 @@ ; CHECK-LABEL: sink_splat_vp_urem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB62_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t @@ -3607,9 +3607,9 @@ ; CHECK-LABEL: sink_splat_vp_srem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB63_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t @@ -3688,9 +3688,9 @@ ; CHECK-LABEL: sink_splat_vp_fma: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB65_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu @@ -3731,9 +3731,9 @@ ; CHECK-LABEL: sink_splat_vp_fma_commute: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB66_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu