Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1188,27 +1188,40 @@ if (!hasFixedResult(AvailableInfo, ST)) return; - // Does it actually let us remove an implicit transition in MBB? - bool Found = false; - for (auto &MI : MBB) { - if (isVectorConfigInstr(MI)) - return; - - const uint64_t TSFlags = MI.getDesc().TSFlags; - if (RISCVII::hasSEWOp(TSFlags)) { - if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) - return; - Found = true; + // Model the effect of changing the input state of the block MBB to + // AvailableInfo. We're looking for two issues here; one legality, + // one profitability. + // 1) If the block doesn't use some of the fields from VL or VTYPE, we + // may hit the end of the block with a different end state. We can + // not make this change without reflowing later blocks as well. + // 2) If we don't actually remove a transition, inserting a vesetvli + // into the predecessor block would be correct, but unprofitable. + VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; + VSETVLIInfo CurInfo = AvailableInfo; + int64_t TransitionsRemoved = 0; + for (const MachineInstr &MI : MBB) { + const VSETVLIInfo LastInfo = CurInfo; + const VSETVLIInfo LastOldInfo = OldInfo; + transferBefore(CurInfo, MI); + transferBefore(OldInfo, MI); + if (CurInfo == LastInfo) + TransitionsRemoved++; + if (LastOldInfo == OldInfo) + TransitionsRemoved--; + transferAfter(CurInfo, MI); + transferAfter(OldInfo, MI); + if (CurInfo == OldInfo) + // Convergence. All transitions after this must match by construction. break; - } } - if (!Found) + if (CurInfo != OldInfo || TransitionsRemoved <= 0) + // Issues 1 and 2 above return; // Finally, update both data flow state and insert the actual vsetvli. // Doing both keeps the code in sync with the dataflow results, which // is critical for correctness of phase 3. - auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; + auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " << UnavailablePred->getName() << " with state " << AvailableInfo << "\n"); @@ -1220,7 +1233,7 @@ auto InsertPt = UnavailablePred->getFirstInstrTerminator(); insertVSETVLI(*UnavailablePred, InsertPt, UnavailablePred->findDebugLoc(InsertPt), - AvailableInfo, OldInfo); + AvailableInfo, OldExit); } static void doUnion(DemandedFields &A, DemandedFields B) { Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -964,10 +964,10 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: li a2, 800 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add a3, a0, a1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a3) ; CHECK-NEXT: vsext.vf4 v9, v8 ; CHECK-NEXT: addi a1, a1, 8