Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1282,36 +1282,12 @@ } } -/// Return true if the VL value configured must be equal to the requested one. -static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { - if (!Info.hasAVLImm()) - // VLMAX is always the same value. - // TODO: Could extend to other registers by looking at the associated vreg - // def placement. - return RISCV::X0 == Info.getAVLReg(); - - unsigned AVL = Info.getAVLImm(); - unsigned SEW = Info.getSEW(); - unsigned AVLInBits = AVL * SEW; - - unsigned LMul; - bool Fractional; - std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); - - if (Fractional) - return ST.getRealMinVLen() / LMul >= AVLInBits; - return ST.getRealMinVLen() * LMul >= AVLInBits; -} - /// Perform simple partial redundancy elimination of the VSETVLI instructions /// we're about to insert by looking for cases where we can PRE from the /// beginning of one block to the end of one of its predecessors. Specifically, /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { - const MachineFunction &MF = *MBB.getParent(); - const RISCVSubtarget &ST = MF.getSubtarget(); - if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) return; @@ -1339,9 +1315,21 @@ if (UnavailablePred->succ_size() != 1) return; - // If VL can be less than AVL, then we can't reduce the frequency of exec. - if (!hasFixedResult(AvailableInfo, ST)) - return; + // Note there's an implicit assumption here that terminators never use + // or modify VL or VTYPE. Also, fallthrough will return end(). + auto InsertPt = UnavailablePred->getFirstInstrTerminator(); + + // Is the AVL value available in the predecessor? + if (!AvailableInfo.hasAVLImm() && RISCV::X0 != AvailableInfo.getAVLReg()) { + auto *DefMI = MRI->getVRegDef(AvailableInfo.getAVLReg()); + // Do a very cheap dominance check specialized for the case where the + // register is defined in the block we're moving into. This is common + // for e.g. values hoisted into preheaders of loops. We could do + // a general dominance check here instead if we though the compile + // time impact was worthwhile. + if (DefMI->getParent()->instr_end() != InsertPt) + return; + } // Does it actually let us remove an implicit transition in MBB? bool Found = false; @@ -1370,9 +1358,6 @@ BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; BlockInfo[MBB.getNumber()].Pred = AvailableInfo; - // Note there's an implicit assumption here that terminators never use - // or modify VL or VTYPE. Also, fallthrough will return end(). - auto InsertPt = UnavailablePred->getFirstInstrTerminator(); insertVSETVLI(*UnavailablePred, InsertPt, UnavailablePred->findDebugLoc(InsertPt), AvailableInfo, OldInfo); Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -12,20 +12,20 @@ ; CHECK-LABEL: gather: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: li a4, 5 -; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: li a3, 5 +; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, mu +; CHECK-NEXT: li a4, 1024 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu -; CHECK-NEXT: vlse8.v v8, (a1), a4 -; CHECK-NEXT: add a6, a0, a2 -; CHECK-NEXT: vle8.v v9, (a6) +; CHECK-NEXT: vlse8.v v8, (a1), a3 +; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: vle8.v v9, (a5) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a6) +; CHECK-NEXT: vse8.v v8, (a5) ; CHECK-NEXT: addi a2, a2, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a5, .LBB0_1 +; CHECK-NEXT: bne a2, a4, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -108,20 +108,20 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: addi a1, a1, 155 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: li a4, -5 -; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: li a3, -5 +; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, mu +; CHECK-NEXT: li a4, 1024 ; CHECK-NEXT: .LBB2_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu -; CHECK-NEXT: vlse8.v v8, (a1), a4 -; CHECK-NEXT: add a6, a0, a2 -; CHECK-NEXT: vle8.v v9, (a6) +; CHECK-NEXT: vlse8.v v8, (a1), a3 +; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: vle8.v v9, (a5) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a6) +; CHECK-NEXT: vse8.v v8, (a5) ; CHECK-NEXT: addi a2, a2, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a5, .LBB2_1 +; CHECK-NEXT: bne a2, a4, .LBB2_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -154,18 +154,18 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu +; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: .LBB3_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-NEXT: vlse8.v v8, (a1), zero -; CHECK-NEXT: add a5, a0, a2 -; CHECK-NEXT: vle8.v v9, (a5) +; CHECK-NEXT: add a4, a0, a2 +; CHECK-NEXT: vle8.v v9, (a4) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a5) +; CHECK-NEXT: vse8.v v8, (a4) ; CHECK-NEXT: addi a2, a2, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a4, .LBB3_1 +; CHECK-NEXT: bne a2, a3, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -201,20 +201,20 @@ ; CHECK-LABEL: scatter: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: li a4, 5 -; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: li a3, 5 +; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, mu +; CHECK-NEXT: li a4, 1024 ; CHECK-NEXT: .LBB4_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a6, a1, a2 -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu -; CHECK-NEXT: vle8.v v8, (a6) -; CHECK-NEXT: vlse8.v v9, (a0), a4 +; CHECK-NEXT: add a5, a1, a2 +; CHECK-NEXT: vle8.v v8, (a5) +; CHECK-NEXT: vlse8.v v9, (a0), a3 ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsse8.v v8, (a0), a4 +; CHECK-NEXT: vsse8.v v8, (a0), a3 ; CHECK-NEXT: addi a2, a2, 32 ; CHECK-NEXT: addi a0, a0, 160 -; CHECK-NEXT: bne a2, a5, .LBB4_1 +; CHECK-NEXT: bne a2, a4, .LBB4_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -673,19 +673,19 @@ ; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: add a6, a1, a6 -; CHECK-NEXT: li a7, 32 -; CHECK-NEXT: li t0, 5 -; CHECK-NEXT: mv t1, a5 +; CHECK-NEXT: li t0, 32 +; CHECK-NEXT: li a7, 5 +; CHECK-NEXT: vsetvli zero, t0, e8, m1, ta, mu +; CHECK-NEXT: mv t0, a5 ; CHECK-NEXT: .LBB12_3: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a7, e8, m1, ta, mu -; CHECK-NEXT: vlse8.v v8, (a6), t0 +; CHECK-NEXT: vlse8.v v8, (a6), a7 ; CHECK-NEXT: vle8.v v9, (a2) ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: addi t1, t1, -32 +; CHECK-NEXT: addi t0, t0, -32 ; CHECK-NEXT: addi a2, a2, 32 ; CHECK-NEXT: addi a6, a6, 160 -; CHECK-NEXT: bnez t1, .LBB12_3 +; CHECK-NEXT: bnez t0, .LBB12_3 ; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: beq a4, a5, .LBB12_7 ; CHECK-NEXT: .LBB12_5: Index: llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -4028,9 +4028,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: .LBB74_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -4065,9 +4065,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: .LBB75_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -4102,9 +4102,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: .LBB76_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -4139,9 +4139,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: .LBB77_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -4176,9 +4176,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: .LBB78_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -4213,9 +4213,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: .LBB79_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -4250,9 +4250,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: .LBB80_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0)