diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1188,27 +1188,40 @@ if (!hasFixedResult(AvailableInfo, ST)) return; - // Does it actually let us remove an implicit transition in MBB? - bool Found = false; - for (auto &MI : MBB) { - if (isVectorConfigInstr(MI)) - return; - - const uint64_t TSFlags = MI.getDesc().TSFlags; - if (RISCVII::hasSEWOp(TSFlags)) { - if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) - return; - Found = true; + // Model the effect of changing the input state of the block MBB to + // AvailableInfo. We're looking for two issues here; one legality, + // one profitability. + // 1) If the block doesn't use some of the fields from VL or VTYPE, we + // may hit the end of the block with a different end state. We can + // not make this change without reflowing later blocks as well. + // 2) If we don't actually remove a transition, inserting a vsetvli + // into the predecessor block would be correct, but unprofitable. + VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; + VSETVLIInfo CurInfo = AvailableInfo; + int TransitionsRemoved = 0; + for (const MachineInstr &MI : MBB) { + const VSETVLIInfo LastInfo = CurInfo; + const VSETVLIInfo LastOldInfo = OldInfo; + transferBefore(CurInfo, MI); + transferBefore(OldInfo, MI); + if (CurInfo == LastInfo) + TransitionsRemoved++; + if (LastOldInfo == OldInfo) + TransitionsRemoved--; + transferAfter(CurInfo, MI); + transferAfter(OldInfo, MI); + if (CurInfo == OldInfo) + // Convergence. All transitions after this must match by construction. break; - } } - if (!Found) + if (CurInfo != OldInfo || TransitionsRemoved <= 0) + // Issues 1 and 2 above return; // Finally, update both data flow state and insert the actual vsetvli. // Doing both keeps the code in sync with the dataflow results, which // is critical for correctness of phase 3. - auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; + auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " << UnavailablePred->getName() << " with state " << AvailableInfo << "\n"); @@ -1220,7 +1233,7 @@ auto InsertPt = UnavailablePred->getFirstInstrTerminator(); insertVSETVLI(*UnavailablePred, InsertPt, UnavailablePred->findDebugLoc(InsertPt), - AvailableInfo, OldInfo); + AvailableInfo, OldExit); } static void doUnion(DemandedFields &A, DemandedFields B) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -954,16 +954,15 @@ ret %e } -; This case demonstrates a PRE oppurtunity where the first instruction -; in the block doesn't require a state transition. Essentially, we need -; to FRE the transition to the start of the block, and *then* PRE it. +; This case demonstrates a PRE case where the first instruction in the block +; doesn't require a state transition. define void @pre_over_vle(ptr %A) { ; CHECK-LABEL: pre_over_vle: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 100 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf4 v9, v8 ; CHECK-NEXT: vse32.v v9, (a0)