Index: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h +++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -18,7 +18,9 @@ namespace llvm { +class SIMachineFunctionInfo; class SIRegisterInfo; +class SISubtarget; /// This is a minimal scheduler strategy. The main difference between this /// and the GenericScheduler is that GCNSchedStrategy uses different @@ -43,6 +45,10 @@ unsigned SGPRCriticalLimit; unsigned VGPRCriticalLimit; + unsigned TargetOccupancy; + + MachineFunction *MF; + public: GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); @@ -53,6 +59,23 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { + const SISubtarget &ST; + + const SIMachineFunctionInfo &MFI; + + // Occupancy target at the begining of function scheduling cycle. + unsigned StartingOccupancy; + + // Minimal real occupancy recorder for the function. + unsigned MinOccupancy; + + // Scheduling stage number. + unsigned Stage; + + // Vecor of regions recorder for later rescheduling + SmallVector, 32> Regions; + // Region live-ins. DenseMap LiveIns; @@ -67,8 +90,12 @@ public: GCNScheduleDAGMILive(MachineSchedContext *C, - std::unique_ptr S) : - ScheduleDAGMILive(C, std::move(S)) {} + std::unique_ptr S); + + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) override; void schedule() override; Index: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -26,7 +26,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C) : - GenericScheduler(C) { } + GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { } static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, const MachineFunction &MF) { @@ -45,6 +45,12 @@ const SIRegisterInfo *SRI = static_cast(TRI); + if (MF != &DAG->MF) + TargetOccupancy = 0; + MF = &DAG->MF; + + const SISubtarget &ST = MF->getSubtarget(); + // FIXME: This is also necessary, because some passes that run after // scheduling and before regalloc increase register pressure. const int ErrorMargin = 3; @@ -53,10 +59,18 @@ ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; VGPRExcessLimit = Context->RegClassInfo ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; - SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, - SRI->getSGPRPressureSet()) - ErrorMargin; - VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, - SRI->getVGPRPressureSet()) - ErrorMargin; + if (TargetOccupancy) { + SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true); + VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); + } else { + SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, + SRI->getSGPRPressureSet()); + VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, + SRI->getVGPRPressureSet()); + } + + SGPRCriticalLimit -= ErrorMargin; + VGPRCriticalLimit -= ErrorMargin; } void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, @@ -309,6 +323,28 @@ return SU; } +GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, + std::unique_ptr S) : + ScheduleDAGMILive(C, std::move(S)), + ST(MF.getSubtarget()), + MFI(*MF.getInfo()), + StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(), + *MF.getFunction())), + MinOccupancy(StartingOccupancy), Stage(0) { + + DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); +} + +void GCNScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) { + ScheduleDAGMILive::enterRegion(bb, begin, end, regioninstrs); + + if (Stage == 0) + Regions.push_back(std::make_pair(begin, end)); +} + void GCNScheduleDAGMILive::schedule() { std::vector Unsched; Unsched.reserve(NumRegionInstrs); @@ -344,6 +380,15 @@ DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore << ", after " << WavesAfter << ".\n"); + // We could not keep current target occupancy because of the just scheduled + // region. Record new occupancy for next scheduling cycle. + unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); + if (NewOccupancy < MinOccupancy) { + MinOccupancy = NewOccupancy; + DEBUG(dbgs() << "Occupancy lowered for the function to " + << MinOccupancy << ".\n"); + } + if (WavesAfter >= WavesBefore) return; @@ -485,5 +530,52 @@ } void GCNScheduleDAGMILive::finalizeSchedule() { + // Retry function scheduling if we found resulting occupancy and it is + // lower than used for first pass scheduling. This will give more freedom + // to schedule low register pressure blocks. + // Code is partially copied from MachineSchedulerBase::scheduleRegions(). + + if (!LIS || StartingOccupancy <= MinOccupancy) + return; + + DEBUG(dbgs() << "Retrying function scheduling with lowest recorded occupancy " + << MinOccupancy << ".\n"); + + Stage++; + GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; + S.TargetOccupancy = MinOccupancy; + + MachineBasicBlock *MBB = nullptr; + for (auto Region : Regions) { + RegionBegin = Region.first; + RegionEnd = Region.second; + + if (RegionBegin->getParent() != MBB) { + if (MBB) finishBlock(); + MBB = RegionBegin->getParent(); + startBlock(MBB); + } + + unsigned NumRegionInstrs = std::distance(begin(), end()); + enterRegion(MBB, begin(), end(), NumRegionInstrs); + + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (begin() == end() || begin() == std::prev(end())) { + exitRegion(); + continue; + } + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << MF.getName() + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *begin() << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); + + schedule(); + + exitRegion(); + } + finishBlock(); LiveIns.shrink_and_clear(); }