Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -54,6 +54,10 @@ // before a region scheduling to know if the region had such clusters. bool HasClusteredNodes; + // schedule() have seen a an excess register pressure and had to track + // register pressure for actual scheduling heuristics. + bool HasExcessPressure; + MachineFunction *MF; public: @@ -100,6 +104,12 @@ // or we generally desire to reschedule it. BitVector RescheduleRegions; + // Record regions which use clustered loads/stores. + BitVector RegionsWithClusters; + + // Record regions with high register pressure. + BitVector RegionsWithHighRP; + // Region live-in cache. SmallVector LiveIns; Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -21,7 +21,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C) : GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false), - MF(nullptr) { } + HasExcessPressure(false), MF(nullptr) { } void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { GenericScheduler::initialize(DAG); @@ -104,11 +104,13 @@ // marked as RegExcess in tryCandidate() when they are compared with // instructions that increase the register pressure. if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { + HasExcessPressure = true; Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); } if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { + HasExcessPressure = true; Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); } @@ -122,6 +124,7 @@ int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; if (SGPRDelta >= 0 || VGPRDelta >= 0) { + HasExcessPressure = true; if (SGPRDelta > VGPRDelta) { Cand.RPDelta.CriticalMax = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); @@ -331,12 +334,17 @@ } GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; - // Set HasClusteredNodes to true for late stages where we are not interested - // in it anymore. That way pickNode() will not scan SDep's when not needed. - S.HasClusteredNodes = Stage >= UnclusteredReschedule; + // Set HasClusteredNodes to true for late stages where we have already + // collected it. That way pickNode() will not scan SDep's when not needed. + S.HasClusteredNodes = Stage > InitialSchedule; + S.HasExcessPressure = false; ScheduleDAGMILive::schedule(); Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); RescheduleRegions[RegionIdx] = false; + if (Stage == InitialSchedule && S.HasClusteredNodes) + RegionsWithClusters[RegionIdx] = true; + if (S.HasExcessPressure) + RegionsWithHighRP[RegionIdx] = true; if (!LIS) return; @@ -381,8 +389,10 @@ unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); if (PressureAfter.getVGPRNum(false) > MaxVGPRs || PressureAfter.getAGPRNum() > MaxVGPRs || - PressureAfter.getSGPRNum() > MaxSGPRs) + PressureAfter.getSGPRNum() > MaxSGPRs) { RescheduleRegions[RegionIdx] = true; + RegionsWithHighRP[RegionIdx] = true; + } if (WavesAfter >= MinOccupancy) { if (Stage == UnclusteredReschedule && @@ -392,7 +402,8 @@ PressureAfter.less(ST, PressureBefore) || !RescheduleRegions[RegionIdx]) { Pressure[RegionIdx] = PressureAfter; - if (!S.HasClusteredNodes && (Stage + 1) == UnclusteredReschedule) + if (!RegionsWithClusters[RegionIdx] && + (Stage + 1) == UnclusteredReschedule) RescheduleRegions[RegionIdx] = false; return; } else { @@ -401,7 +412,7 @@ } LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); - RescheduleRegions[RegionIdx] = S.HasClusteredNodes || + RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] || (Stage + 1) != UnclusteredReschedule; RegionEnd = RegionBegin; for (MachineInstr *MI : Unsched) { @@ -535,7 +546,11 @@ LiveIns.resize(Regions.size()); Pressure.resize(Regions.size()); RescheduleRegions.resize(Regions.size()); + RegionsWithClusters.resize(Regions.size()); + RegionsWithHighRP.resize(Regions.size()); RescheduleRegions.set(); + RegionsWithClusters.reset(); + RegionsWithHighRP.reset(); if (!Regions.empty()) BBLiveInMap = getBBLiveInMap(); @@ -580,7 +595,10 @@ SavedMutations.swap(Mutations); for (auto Region : Regions) { - if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) { + if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) || + (Stage == ClusteredLowOccupancyReschedule && + !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) { + ++RegionIdx; continue; }