diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -50,9 +50,9 @@ VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); } else { SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, - SRI->getSGPRPressureSet()); + AMDGPU::RegisterPressureSets::SReg_32); VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, - SRI->getVGPRPressureSet()); + AMDGPU::RegisterPressureSets::VGPR_32); } SGPRCriticalLimit -= ErrorMargin; @@ -83,8 +83,8 @@ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); } - unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; - unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; + unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; + unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; // If two instructions increase the pressure of different register sets // by the same amount, the generic scheduler will prefer to schedule the @@ -109,12 +109,12 @@ // marked as RegExcess in tryCandidate() when they are compared with // instructions that increase the register pressure. if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { - Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); + Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); } if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { - Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); + Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); } @@ -128,10 +128,12 @@ if (SGPRDelta >= 0 || VGPRDelta >= 0) { if (SGPRDelta > VGPRDelta) { - Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); + Cand.RPDelta.CriticalMax = + PressureChange(AMDGPU::RegisterPressureSets::SReg_32); Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); } else { - Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); + Cand.RPDelta.CriticalMax = + PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); } } @@ -145,8 +147,8 @@ SchedCandidate &Cand) { const SIRegisterInfo *SRI = static_cast(TRI); ArrayRef Pressure = RPTracker.getRegSetPressureAtPos(); - unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; - unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; + unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; + unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -435,9 +435,6 @@ std::vector ScheduledSUnits; std::vector ScheduledSUnitsInv; - unsigned VGPRSetID; - unsigned SGPRSetID; - public: SIScheduleDAGMI(MachineSchedContext *C); @@ -484,9 +481,6 @@ return OutRegs; }; - unsigned getVGPRSetID() const { return VGPRSetID; } - unsigned getSGPRSetID() const { return SGPRSetID; } - private: void topologicalSort(); // After scheduling is done, improve low latency placements. diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -269,8 +269,8 @@ // Predict register usage after this instruction. TryCand.SU = SU; TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure); - TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()]; - TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()]; + TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32]; + TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32]; TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum]; TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum]; TryCand.HasLowLatencyNonWaitedParent = @@ -595,10 +595,12 @@ } if (Scheduled) { - dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' ' - << LiveInPressure[DAG->getVGPRSetID()] << '\n'; - dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' ' - << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n"; + dbgs() << "LiveInPressure " + << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' ' + << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n'; + dbgs() << "LiveOutPressure " + << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' ' + << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n"; dbgs() << "LiveIns:\n"; for (unsigned Reg : LiveInRegs) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; @@ -1637,7 +1639,7 @@ TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock(); TryCand.VGPRUsageDiff = checkRegUsageImpact(TryCand.Block->getInRegs(), - TryCand.Block->getOutRegs())[DAG->getVGPRSetID()]; + TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32]; TryCand.NumSuccessors = TryCand.Block->getSuccs().size(); TryCand.NumHighLatencySuccessors = TryCand.Block->getNumHighLatencySuccessors(); @@ -1796,9 +1798,6 @@ ScheduleDAGMILive(C, std::make_unique(C)) { SITII = static_cast(TII); SITRI = static_cast(TRI); - - VGPRSetID = SITRI->getVGPRPressureSet(); - SGPRSetID = SITRI->getSGPRPressureSet(); } SIScheduleDAGMI::~SIScheduleDAGMI() = default; @@ -1909,9 +1908,9 @@ continue; PSetIterator PSetI = MRI.getPressureSets(Reg); for (; PSetI.isValid(); ++PSetI) { - if (*PSetI == VGPRSetID) + if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32) VgprUsage += PSetI.getWeight(); - else if (*PSetI == SGPRSetID) + else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32) SgprUsage += PSetI.getWeight(); } } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -30,19 +30,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { private: const GCNSubtarget &ST; - unsigned SGPRSetID; - unsigned VGPRSetID; - unsigned AGPRSetID; - BitVector SGPRPressureSets; - BitVector VGPRPressureSets; - BitVector AGPRPressureSets; bool SpillSGPRToVGPR; bool isWave32; void reserveRegisterTuples(BitVector &, unsigned Reg) const; - void classifyPressureSet(unsigned PSetID, unsigned Reg, - BitVector &PressureSets) const; public: SIRegisterInfo(const GCNSubtarget &ST); @@ -207,10 +199,6 @@ const TargetRegisterClass *RC, const MachineFunction &MF) const; - unsigned getSGPRPressureSet() const { return SGPRSetID; }; - unsigned getVGPRPressureSet() const { return VGPRSetID; }; - unsigned getAGPRPressureSet() const { return AGPRSetID; }; - const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const; bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const; @@ -224,19 +212,6 @@ return !isSGPRClass(RC); } - bool isSGPRPressureSet(unsigned SetID) const { - return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID) && - !AGPRPressureSets.test(SetID); - } - bool isVGPRPressureSet(unsigned SetID) const { - return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) && - !AGPRPressureSets.test(SetID); - } - bool isAGPRPressureSet(unsigned SetID) const { - return AGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) && - !VGPRPressureSets.test(SetID); - } - ArrayRef getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -41,53 +41,8 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(0), ST(ST), - SGPRPressureSets(getNumRegPressureSets()), - VGPRPressureSets(getNumRegPressureSets()), - AGPRPressureSets(getNumRegPressureSets()), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { - unsigned NumRegPressureSets = getNumRegPressureSets(); - - SGPRSetID = NumRegPressureSets; - VGPRSetID = NumRegPressureSets; - AGPRSetID = NumRegPressureSets; - - for (unsigned i = 0; i < NumRegPressureSets; ++i) { - classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets); - classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets); - classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets); - } - - // Determine the number of reg units for each pressure set. - std::vector PressureSetRegUnits(NumRegPressureSets, 0); - for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) { - const int *PSets = getRegUnitPressureSets(i); - for (unsigned j = 0; PSets[j] != -1; ++j) { - ++PressureSetRegUnits[PSets[j]]; - } - } - - unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0; - for (unsigned i = 0; i < NumRegPressureSets; ++i) { - if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) { - VGPRSetID = i; - VGPRMax = PressureSetRegUnits[i]; - continue; - } - if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) { - SGPRSetID = i; - SGPRMax = PressureSetRegUnits[i]; - } - if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) { - AGPRSetID = i; - AGPRMax = PressureSetRegUnits[i]; - continue; - } - } - - assert(SGPRSetID < NumRegPressureSets && - VGPRSetID < NumRegPressureSets && - AGPRSetID < NumRegPressureSets); } void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, @@ -148,25 +103,6 @@ return CSR_AMDGPU_AllAllocatableSRegs_RegMask; } -static bool hasPressureSet(const int *PSets, unsigned PSetID) { - for (unsigned i = 0; PSets[i] != -1; ++i) { - if (PSets[i] == (int)PSetID) - return true; - } - return false; -} - -void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, - BitVector &PressureSets) const { - for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) { - const int *PSets = getRegUnitPressureSets(*U); - if (hasPressureSet(PSets, PSetID)) { - PressureSets.set(PSetID); - break; - } - } -} - // FIXME: TableGen should generate something to make this manageable for all // register classes. At a minimum we could use the opposite of // composeSubRegIndices and go up from the base 32-bit subreg. @@ -1875,11 +1811,12 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const { - if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet()) + if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 || + Idx == AMDGPU::RegisterPressureSets::AGPR_32) return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, const_cast(MF)); - if (Idx == getSGPRPressureSet()) + if (Idx == AMDGPU::RegisterPressureSets::SReg_32) return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, const_cast(MF));