Index: lib/Target/AMDGPU/GCNRegPressure.cpp =================================================================== --- lib/Target/AMDGPU/GCNRegPressure.cpp +++ lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "GCNRegPressure.h" +#include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -264,6 +265,61 @@ MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); } +struct MachineInstrRegs { + SmallVector Defs; + SmallVector Uses; + +private: + static RegisterMaskPair& insert(SmallVectorImpl &A, + unsigned Reg) { + auto I = std::find_if(A.begin(), A.end(), [Reg](const RegisterMaskPair &RM) { + return RM.RegUnit == Reg; + }); + if (I != A.end()) + return *I; + A.push_back(RegisterMaskPair(Reg, LaneBitmask::getNone())); + return A.back(); + } + +public: + static MachineInstrRegs collectVirtualRegs(const MachineInstr &MI, + const LiveIntervals &LIS, + const MachineRegisterInfo &MRI) { + MachineInstrRegs Res; + for (const auto &MO : MI.operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + if (MO.isDef() && !MO.isDead()) { + auto &LaneMask = insert(Res.Defs, MO.getReg()).LaneMask; + LaneMask |= MO.getSubReg() == 0 ? + MRI.getMaxLaneMaskForVReg(MO.getReg()) : + MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg()); + } else + if (MO.isUse() && MO.readsReg()) { + auto &LaneMask = insert(Res.Uses, MO.getReg()).LaneMask; + auto const MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg()); + if (MaxMask.getAsInteger() == 1) // cannot have subregs + LaneMask = MaxMask; + else if (auto SubReg = MO.getSubReg()) + LaneMask |= MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); + else + LaneMask = LaneBitmask::getAll(); // check actual usage mask later (once) + } + } + // adjust correct usage mask using LIS + for (auto &U : Res.Uses) { + if (!U.LaneMask.all()) continue; + // For a tentative schedule LIS isn't updated yet but livemask should remain + // the same on any schedule. Subreg defs can be reordered but they all must + // dominate uses anyway. + auto SI = LIS.getInstructionIndex(MI).getBaseIndex(); + U.LaneMask = getLiveLaneMask(U.RegUnit, SI, LIS, MRI); + } + return Res; + } +}; + void GCNUpwardRPTracker::recede(const MachineInstr &MI) { assert(MRI && "call reset first"); @@ -272,34 +328,30 @@ if (MI.isDebugValue()) return; - // process all defs first to ensure early clobbers are handled correctly - // iterating over operands() to catch implicit defs - for (const auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; + auto const Regs = MachineInstrRegs::collectVirtualRegs(MI, LIS, *MRI); - auto Reg = MO.getReg(); - auto &LiveMask = LiveRegs[Reg]; - auto PrevMask = LiveMask; - LiveMask &= ~getDefRegMask(MO); - CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); + // calc pressure at the MI (defs + uses) + auto AtMIPressure = CurPressure; + for (const auto &U : Regs.Uses) { + auto LiveMask = LiveRegs[U.RegUnit]; + AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI); } + // update max pressure + MaxPressure = max(AtMIPressure, MaxPressure); - // then all uses - for (const auto &MO : MI.uses()) { - if (!MO.isReg() || !MO.readsReg() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - auto Reg = MO.getReg(); - auto &LiveMask = LiveRegs[Reg]; + for (const auto &D : Regs.Defs) { + auto &LiveMask = LiveRegs[D.RegUnit]; auto PrevMask = LiveMask; - LiveMask |= getUsedRegMask(MO); - CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); + LiveMask &= ~D.LaneMask; + CurPressure.inc(D.RegUnit, PrevMask, LiveMask, *MRI); } - - MaxPressure = max(MaxPressure, CurPressure); + for (const auto &U : Regs.Uses) { + auto &LiveMask = LiveRegs[U.RegUnit]; + auto PrevMask = LiveMask; + LiveMask |= U.LaneMask; + CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI); + } + assert(CurPressure == getRegPressure(*MRI, LiveRegs)); } bool GCNDownwardRPTracker::reset(const MachineInstr &MI,