Index: lib/Target/AMDGPU/GCNRegPressure.h =================================================================== --- lib/Target/AMDGPU/GCNRegPressure.h +++ lib/Target/AMDGPU/GCNRegPressure.h @@ -92,16 +92,21 @@ typedef DenseMap LiveRegSet; protected: + const LiveIntervals &LIS; LiveRegSet LiveRegs; GCNRegPressure CurPressure, MaxPressure; const MachineInstr *LastTrackedMI = nullptr; mutable const MachineRegisterInfo *MRI = nullptr; - GCNRPTracker() {} + GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} + LaneBitmask getDefRegMask(const MachineOperand &MO) const; + LaneBitmask getUsedRegMask(const MachineOperand &MO) const; public: // live regs for the current state const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; } const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; } + void clearMaxPressure() { MaxPressure.clear(); } + // returns MaxPressure, resetting it decltype(MaxPressure) moveMaxPressure() { auto Res = MaxPressure; @@ -114,14 +119,11 @@ }; class GCNUpwardRPTracker : public GCNRPTracker { - const LiveIntervals &LIS; - LaneBitmask getDefRegMask(const MachineOperand &MO) const; - LaneBitmask getUsedRegMask(const MachineOperand &MO) const; public: - GCNUpwardRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} + GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} // reset tracker to the point just below MI // filling live regs upon this point using LIS - void reset(const MachineInstr &MI); + void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); // move to the state just above the MI void recede(const MachineInstr &MI); @@ -131,6 +133,26 @@ bool isValid() const; }; +class GCNDownwardRPTracker : public GCNRPTracker { + // move to the state at the MI, advanceBefore(MI) has to be called first + void advanceTo(const MachineInstr &MI); + +public: + GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} + // reset tracker to the point at MI + // filling live regs upon this point using LIS + void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); + + // move to the state right before the MI + void advanceBefore(const MachineInstr &MI); + + // move to the state at the MI + void advance(const MachineInstr &MI) { + advanceBefore(MI); + advanceTo(MI); + } +}; + LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, Index: lib/Target/AMDGPU/GCNRegPressure.cpp =================================================================== --- lib/Target/AMDGPU/GCNRegPressure.cpp +++ lib/Target/AMDGPU/GCNRegPressure.cpp @@ -223,13 +223,7 @@ return LiveRegs; } -void GCNUpwardRPTracker::reset(const MachineInstr &MI) { - MRI = &MI.getParent()->getParent()->getRegInfo(); - LiveRegs = getLiveRegsAfter(MI, LIS); - MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); -} - -LaneBitmask GCNUpwardRPTracker::getDefRegMask(const MachineOperand &MO) const { +LaneBitmask GCNRPTracker::getDefRegMask(const MachineOperand &MO) const { assert(MO.isDef() && MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())); @@ -241,7 +235,7 @@ MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg()); } -LaneBitmask GCNUpwardRPTracker::getUsedRegMask(const MachineOperand &MO) const { +LaneBitmask GCNRPTracker::getUsedRegMask(const MachineOperand &MO) const { assert(MO.isUse() && MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())); @@ -259,6 +253,13 @@ return getLiveLaneMask(MO.getReg(), SI, LIS, *MRI); } +void GCNUpwardRPTracker::reset(const MachineInstr &MI, + const LiveRegSet *LiveRegsCopy) { + MRI = &MI.getParent()->getParent()->getRegInfo(); + LiveRegs = LiveRegsCopy ? *LiveRegsCopy : getLiveRegsAfter(MI, LIS); + MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); +} + void GCNUpwardRPTracker::recede(const MachineInstr &MI) { assert(MRI && "call reset first"); @@ -297,6 +298,67 @@ MaxPressure = max(MaxPressure, CurPressure); } +void GCNDownwardRPTracker::reset(const MachineInstr &MI, + const LiveRegSet *LiveRegsCopy) { + MRI = &MI.getParent()->getParent()->getRegInfo(); + MachineBasicBlock::const_iterator I = MI.getIterator(); + I = skipDebugInstructionsForward(I, I->getParent()->end()); + LiveRegs = LiveRegsCopy ? *LiveRegsCopy : getLiveRegsBefore(*I, LIS); + MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); +} + +void GCNDownwardRPTracker::advanceBefore(const MachineInstr &MI) { + assert(MRI && "call reset first"); + + if (MI.isDebugValue()) + return; + + SlotIndex SI = LIS.getInstructionIndex(MI).getBaseIndex(); + assert (SI.isValid()); + + // Remove dead registers or mask bits. + for (auto &It : LiveRegs) { + if (It.second.none()) + continue; + const LiveInterval &LI = LIS.getInterval(It.first); + if (LI.hasSubRanges()) { + for (const auto &S : LI.subranges()) { + if (!S.liveAt(SI)) { + auto PrevMask = It.second; + It.second &= ~S.LaneMask; + CurPressure.inc(It.first, PrevMask, It.second, *MRI); + } + } + } else if (!LI.liveAt(SI)) { + auto PrevMask = It.second; + It.second = LaneBitmask::getNone(); + CurPressure.inc(It.first, PrevMask, It.second, *MRI); + } + } +} + +void GCNDownwardRPTracker::advanceTo(const MachineInstr &MI) { + LastTrackedMI = &MI; + + if (MI.isDebugValue()) + return; + + // Add new registers or mask bits. + for (const auto &MO : MI.defs()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + auto &LiveMask = LiveRegs[Reg]; + auto PrevMask = LiveMask; + LiveMask |= getDefRegMask(MO); + CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); + } + + MaxPressure = max(MaxPressure, CurPressure); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, Index: lib/Target/AMDGPU/GCNSchedStrategy.h =================================================================== --- lib/Target/AMDGPU/GCNSchedStrategy.h +++ lib/Target/AMDGPU/GCNSchedStrategy.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H +#include "GCNRegPressure.h" #include "llvm/CodeGen/MachineScheduler.h" namespace llvm { @@ -79,16 +80,13 @@ MachineBasicBlock::iterator>, 32> Regions; // Region live-ins. - DenseMap LiveIns; - - // Number of live-ins to the current region, first SGPR then VGPR. - std::pair LiveInPressure; + GCNRPTracker::LiveRegSet LiveIns; // Collect current region live-ins. void discoverLiveIns(); - // Return current region pressure. First value is SGPR number, second is VGPR. - std::pair getRealRegPressure() const; + // Return current region pressure. + GCNRegPressure getRealRegPressure() const; public: GCNScheduleDAGMILive(MachineSchedContext *C, Index: lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -327,11 +327,15 @@ for (auto &I : *this) Unsched.push_back(&I); - std::pair PressureBefore; + GCNRegPressure PressureBefore; if (LIS) { - DEBUG(dbgs() << "Pressure before scheduling:\n"); discoverLiveIns(); PressureBefore = getRealRegPressure(); + + DEBUG(dbgs() << "Pressure before scheduling:\nSGPR = " + << PressureBefore.getSGPRNum() + << "\nVGPR = " << PressureBefore.getVGPRNum() << '\n'); + } ScheduleDAGMILive::schedule(); @@ -343,19 +347,23 @@ // Check the results of scheduling. GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; - DEBUG(dbgs() << "Pressure after scheduling:\n"); auto PressureAfter = getRealRegPressure(); + + DEBUG(dbgs() << "Pressure after scheduling:\nSGPR = " + << PressureAfter.getSGPRNum() + << "\nVGPR = " << PressureAfter.getVGPRNum() << '\n'); + LiveIns.clear(); - if (PressureAfter.first <= S.SGPRCriticalLimit && - PressureAfter.second <= S.VGPRCriticalLimit) { + if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && + PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { DEBUG(dbgs() << "Pressure in desired limits, done.\n"); return; } - unsigned WavesAfter = getMaxWaves(PressureAfter.first, - PressureAfter.second, MF); - unsigned WavesBefore = getMaxWaves(PressureBefore.first, - PressureBefore.second, MF); + unsigned WavesAfter = getMaxWaves(PressureAfter.getSGPRNum(), + PressureAfter.getVGPRNum(), MF); + unsigned WavesBefore = getMaxWaves(PressureBefore.getSGPRNum(), + PressureBefore.getVGPRNum(), MF); DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore << ", after " << WavesAfter << ".\n"); @@ -404,112 +412,35 @@ placeDebugValues(); } -static inline void setMask(const MachineRegisterInfo &MRI, - const SIRegisterInfo *SRI, unsigned Reg, - LaneBitmask &PrevMask, LaneBitmask NewMask, - unsigned &SGPRs, unsigned &VGPRs) { - int NewRegs = countPopulation(NewMask.getAsInteger()) - - countPopulation(PrevMask.getAsInteger()); - if (SRI->isSGPRReg(MRI, Reg)) - SGPRs += NewRegs; - if (SRI->isVGPR(MRI, Reg)) - VGPRs += NewRegs; - assert ((int)SGPRs >= 0 && (int)VGPRs >= 0); - PrevMask = NewMask; -} - void GCNScheduleDAGMILive::discoverLiveIns() { - unsigned SGPRs = 0; - unsigned VGPRs = 0; - - auto I = begin(); - I = skipDebugInstructionsForward(I, I->getParent()->end()); - const SIRegisterInfo *SRI = static_cast(TRI); - SlotIndex SI = LIS->getInstructionIndex(*I).getBaseIndex(); - assert (SI.isValid()); - - DEBUG(dbgs() << "Region live-ins:"); - for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) - continue; - const LiveInterval &LI = LIS->getInterval(Reg); - LaneBitmask LaneMask = LaneBitmask::getNone(); - if (LI.hasSubRanges()) { - for (const auto &S : LI.subranges()) - if (S.liveAt(SI)) - LaneMask |= S.LaneMask; - } else if (LI.liveAt(SI)) { - LaneMask = MRI.getMaxLaneMaskForVReg(Reg); - } - - if (LaneMask.any()) { - setMask(MRI, SRI, Reg, LiveIns[Reg], LaneMask, SGPRs, VGPRs); - - DEBUG(dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':' - << PrintLaneMask(LiveIns[Reg])); - } - } - - LiveInPressure = std::make_pair(SGPRs, VGPRs); - - DEBUG(dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs - << "\nVGPR = " << VGPRs << '\n'); + GCNDownwardRPTracker RPTracker(*LIS); + RPTracker.reset(*begin()); + + LiveIns = RPTracker.moveLiveRegs(); + + DEBUG(GCNRegPressure LiveInPressure = RPTracker.moveMaxPressure(); + const SIRegisterInfo *SRI = static_cast(TRI); + dbgs() << "Region live-ins:"; + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + auto It = LiveIns.find(Reg); + if (It != LiveIns.end()) + dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':' + << PrintLaneMask(It->second); + } + dbgs() << "\nLive-in pressure:\nSGPR = " + << LiveInPressure.getSGPRNum() + << "\nVGPR = " << LiveInPressure.getVGPRNum() << '\n'); } -std::pair -GCNScheduleDAGMILive::getRealRegPressure() const { - unsigned SGPRs, MaxSGPRs, VGPRs, MaxVGPRs; - SGPRs = MaxSGPRs = LiveInPressure.first; - VGPRs = MaxVGPRs = LiveInPressure.second; +GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { + GCNDownwardRPTracker RPTracker(*LIS); + RPTracker.reset(*begin(), &LiveIns); - const SIRegisterInfo *SRI = static_cast(TRI); - DenseMap LiveRegs(LiveIns); - - for (const MachineInstr &MI : *this) { - if (MI.isDebugValue()) - continue; - SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex(); - assert (SI.isValid()); - - // Remove dead registers or mask bits. - for (auto &It : LiveRegs) { - if (It.second.none()) - continue; - const LiveInterval &LI = LIS->getInterval(It.first); - if (LI.hasSubRanges()) { - for (const auto &S : LI.subranges()) - if (!S.liveAt(SI)) - setMask(MRI, SRI, It.first, It.second, It.second & ~S.LaneMask, - SGPRs, VGPRs); - } else if (!LI.liveAt(SI)) { - setMask(MRI, SRI, It.first, It.second, LaneBitmask::getNone(), - SGPRs, VGPRs); - } - } - - // Add new registers or mask bits. - for (const auto &MO : MI.defs()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - unsigned SubRegIdx = MO.getSubReg(); - LaneBitmask LaneMask = SubRegIdx != 0 - ? TRI->getSubRegIndexLaneMask(SubRegIdx) - : MRI.getMaxLaneMaskForVReg(Reg); - LaneBitmask &LM = LiveRegs[Reg]; - setMask(MRI, SRI, Reg, LM, LM | LaneMask, SGPRs, VGPRs); - } - MaxSGPRs = std::max(MaxSGPRs, SGPRs); - MaxVGPRs = std::max(MaxVGPRs, VGPRs); - } - - DEBUG(dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs - << "\nVGPR = " << MaxVGPRs << '\n'); + for (auto &I : *this) + RPTracker.advance(I); - return std::make_pair(MaxSGPRs, MaxVGPRs); + return RPTracker.moveMaxPressure(); } void GCNScheduleDAGMILive::finalizeSchedule() {