Index: llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.h +++ llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.h @@ -190,6 +190,50 @@ const LiveIntervals &LIS, const MachineRegisterInfo &MRI); +/// creates a map MachineInstr -> LiveRegSet +/// R - range of iterators on instructions +/// After - upon entry or exit of every instruction +/// Note: there is no entry in the map for instructions with empty live reg set +/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R)) +template +DenseMap +getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) { + std::vector Indexes; + Indexes.reserve(std::distance(R.begin(), R.end())); + auto &SII = *LIS.getSlotIndexes(); + for (MachineInstr *I : R) { + auto SI = SII.getInstructionIndex(*I); + Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex()); + } + std::sort(Indexes.begin(), Indexes.end()); + + auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo(); + DenseMap LiveRegMap; + SmallVector LiveIdxs, SRLiveIdxs; + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + auto Reg = TargetRegisterInfo::index2VirtReg(I); + if (!LIS.hasInterval(Reg)) + continue; + auto &LI = LIS.getInterval(Reg); + LiveIdxs.clear(); + if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs))) + continue; + if (!LI.hasSubRanges()) { + for (auto SI : LiveIdxs) + LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] = + MRI.getMaxLaneMaskForVReg(Reg); + } else + for (const auto &S : LI.subranges()) { + // constrain search for subranges by indexes live at main range + SRLiveIdxs.clear(); + S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs)); + for (auto SI : SRLiveIdxs) + LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask; + } + } + return LiveRegMap; +} + inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI, const LiveIntervals &LIS) { return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS, @@ -211,6 +255,9 @@ return Res; } +bool isEqual(const GCNRPTracker::LiveRegSet &S1, + const GCNRPTracker::LiveRegSet &S2); + void printLivesAt(SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI); Index: llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -63,9 +63,10 @@ } if (!Num) dbgs() << " \n"; } +#endif -static bool isEqual(const GCNRPTracker::LiveRegSet &S1, - const GCNRPTracker::LiveRegSet &S2) { +bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1, + const GCNRPTracker::LiveRegSet &S2) { if (S1.size() != S2.size()) return false; @@ -76,7 +77,7 @@ } return true; } -#endif + /////////////////////////////////////////////////////////////////////////////// // GCNRegPressure Index: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h +++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -90,6 +90,9 @@ // Temporary basic block live-in cache. DenseMap MBBLiveIns; + DenseMap BBLiveInMap; + DenseMap getBBLiveInMap() const; + // Return current region pressure. GCNRegPressure getRealRegPressure() const; Index: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -445,8 +445,12 @@ RPTracker.reset(*MBB->begin(), &LiveIn); MBBLiveIns.erase(LiveInIt); } else { - I = Regions[CurRegion].first; - RPTracker.reset(*I); + auto &Rgn = Regions[CurRegion]; + I = Rgn.first; + auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); + auto LRS = BBLiveInMap.lookup(NonDbgMI); + assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); + RPTracker.reset(*I, &LRS); } for ( ; ; ) { @@ -477,6 +481,23 @@ } } +DenseMap +GCNScheduleDAGMILive::getBBLiveInMap() const { + assert(!Regions.empty()); + std::vector BBStarters; + BBStarters.reserve(Regions.size()); + auto I = Regions.rbegin(), E = Regions.rend(); + auto *BB = I->first->getParent(); + do { + auto *MI = &*skipDebugInstructionsForward(I->first, I->second); + BBStarters.push_back(MI); + do { + ++I; + } while (I != E && I->first->getParent() == BB); + } while (I != E); + return getLiveRegMap(BBStarters, false /*After*/, *LIS); +} + void GCNScheduleDAGMILive::finalizeSchedule() { GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); @@ -484,6 +505,9 @@ LiveIns.resize(Regions.size()); Pressure.resize(Regions.size()); + if (!Regions.empty()) + BBLiveInMap = getBBLiveInMap(); + do { Stage++; RegionIdx = 0;