diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -237,14 +237,12 @@ bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const { assert(isValid() && InstrInfo.isValid() && "Can't compare invalid VSETVLIInfos"); - assert(!InstrInfo.SEWLMULRatioOnly && - "Expected a valid VTYPE for instruction!"); // Nothing is compatible with Unknown. if (isUnknown() || InstrInfo.isUnknown()) return false; // If only our VLMAX ratio is valid, then this isn't compatible. - if (SEWLMULRatioOnly) + if (SEWLMULRatioOnly || InstrInfo.SEWLMULRatioOnly) return false; // If the instruction doesn't need an AVLReg and the SEW matches, consider @@ -446,6 +444,16 @@ BlockData() = default; }; +// The different kinds of VSETVLI insertion we support. +enum VSETVLIInsertionKind { + // No VSETVLI required (e.g., compatible with previous) + None, + // A new VSETVLI is required + Required, + // A previous VSETVLI may safely be mutated. + MutatePrevious, +}; + class RISCVInsertVSETVLI : public MachineFunctionPass { const TargetInstrInfo *TII; MachineRegisterInfo *MRI; @@ -470,6 +478,10 @@ private: bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); + VSETVLIInsertionKind needVSETVLI(const MachineInstr &MI, + const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo, + const MachineInstr *PrevVSETVLIMI = nullptr); bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB); void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); @@ -935,14 +947,51 @@ return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require); } +VSETVLIInsertionKind RISCVInsertVSETVLI::needVSETVLI( + const MachineInstr &MI, const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo, const MachineInstr *PrevVSETVLIMI) { + if (!needVSETVLI(Require, CurInfo)) + return VSETVLIInsertionKind::None; + + // If this is a unit-stride or strided load/store, we may be able to use + // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. + if (canSkipVSETVLIForLoadStore(MI, Require, CurInfo)) + return VSETVLIInsertionKind::None; + + // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it + // with current VL/VTYPE. + if (PrevVSETVLIMI) { + bool HasSameAVL = + CurInfo.hasSameAVL(Require) || + (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && + Require.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); + // If these two VSETVLI have the same AVL and the same VLMAX, + // we could merge these two VSETVLI. + if (HasSameAVL && CurInfo.getSEWLMULRatio() == Require.getSEWLMULRatio()) + return VSETVLIInsertionKind::MutatePrevious; + + if (isScalarMoveInstr(MI) && + ((CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) || + (CurInfo.hasZeroAVL() && Require.hasZeroAVL())) && + Require.hasSameVLMAX(CurInfo)) + return VSETVLIInsertionKind::MutatePrevious; + } + + return VSETVLIInsertionKind::Required; +} + bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { bool HadVectorOp = false; + // Only set if current VSETVLIInfo is from an explicit VSET(I)VLI. + const MachineInstr *PrevVSETVLIMI = nullptr; + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; for (const MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { HadVectorOp = true; + PrevVSETVLIMI = &MI; BBInfo.Change = getInfoForVSETVLI(MI); continue; } @@ -958,15 +1007,15 @@ } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. // NOTE: We only do this if the vtype we're comparing against was // created in this block. We need the first and third phase to treat // the store the same way. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && - needVSETVLI(NewInfo, BBInfo.Change)) + if (needVSETVLI(MI, NewInfo, BBInfo.Change, PrevVSETVLIMI) != + VSETVLIInsertionKind::None) { BBInfo.Change = NewInfo; + } } + PrevVSETVLIMI = nullptr; } // If this is something that updates VL/VTYPE that we don't know about, set @@ -974,6 +1023,7 @@ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { BBInfo.Change = VSETVLIInfo::getUnknown(); + PrevVSETVLIMI = nullptr; } } @@ -1003,15 +1053,22 @@ BBInfo.Pred = InInfo; - VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); + // Cache the exit state before recomputation. + // FIXME: We can drop 'Change' and just use 'Exit'. + // FIXME: We shouldn't have to set 'Change' as an input + auto CachedExit = BBInfo.Exit; + + // Now that we've computed the info for predecessors, recompute the VTYPE + // changes on this block. The predecessors may have changed the incoming + // vtype and we must be in sync with phase 3. + BBInfo.Change = InInfo; + computeVLVTYPEChanges(MBB); // If the new exit value matches the old exit value, we don't need to revisit // any blocks. - if (BBInfo.Exit == TmpStatus) + if (CachedExit == BBInfo.Exit) return; - BBInfo.Exit = TmpStatus; - // Add the successors to the work list so we can propagate the changed exit // status. for (MachineBasicBlock *S : MBB.successors()) @@ -1070,7 +1127,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; - // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. + // Only set if current VSETVLIInfo is from an explicit VSET(I)VLI. MachineInstr *PrevVSETVLIMI = nullptr; for (MachineInstr &MI : MBB) { @@ -1120,38 +1177,15 @@ } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. // NOTE: We can't use predecessor information for the store. We must // treat it the same as the first phase so that we produce the correct - // vl/vtype for succesor blocks. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && - needVSETVLI(NewInfo, CurInfo)) { - // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it - // with current VL/VTYPE. - bool NeedInsertVSETVLI = true; - if (PrevVSETVLIMI) { - bool HasSameAVL = - CurInfo.hasSameAVL(NewInfo) || - (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && - NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); - // If these two VSETVLI have the same AVL and the same VLMAX, - // we could merge these two VSETVLI. - if (HasSameAVL && - CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - if (isScalarMoveInstr(MI) && - ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || - (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && - NewInfo.hasSameVLMAX(CurInfo)) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - } - if (NeedInsertVSETVLI) + // vl/vtype for successor blocks. + auto Kind = needVSETVLI(MI, NewInfo, CurInfo, PrevVSETVLIMI); + if (Kind != VSETVLIInsertionKind::None) { + if (Kind == VSETVLIInsertionKind::Required) insertVSETVLI(MBB, MI, NewInfo, CurInfo); + else if (Kind == VSETVLIInsertionKind::MutatePrevious) + PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); CurInfo = NewInfo; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -744,9 +744,6 @@ ... --- -# FIXME: This test shows incorrect VSETVLI insertion. The VLUXEI64 needs -# configuration for SEW=8 but it instead inherits a SEW=64 from the entry -# block. name: vsetvli_vluxei64_regression tracksRegLiveness: true body: | @@ -779,6 +776,7 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: early-clobber %t0:vrnov0 = PseudoVLUXEI64_V_M1_MF8_MASK %t5, killed %inaddr, %idxs, $v0, -1, 3 /* e8 */, 1, implicit $vl, implicit $vtype ; CHECK-NEXT: %ldval:vr = COPY %t0 ; CHECK-NEXT: PseudoBR %bb.3 @@ -786,6 +784,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: %stval:vr = PHI %t4, %bb.1, %ldval, %bb.2 ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoVSOXEI64_V_M1_MF8_MASK killed %stval, killed %b, %idxs, $v0, -1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET bb.0: