Index: lib/Target/AMDGPU/SIMachineScheduler.h =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.h +++ lib/Target/AMDGPU/SIMachineScheduler.h @@ -333,6 +333,7 @@ std::vector BlocksScheduled; unsigned NumBlockScheduled; std::vector ReadyBlocks; + std::set CurrentPathRegUsage; unsigned VregCurrentUsage; unsigned SregCurrentUsage; @@ -402,6 +403,10 @@ std::set &OutRegs); void schedule(); + std::set findPathRegUsage(int SearchDepthLimit, + int VGPRDiffGoal, + int SGPRDiffGoal, + bool PriorityVGPR); }; struct SIScheduleBlockResult { Index: lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.cpp +++ lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -17,6 +17,9 @@ #include "SIMachineScheduler.h" #include "SIRegisterInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -1570,10 +1573,42 @@ dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n'; ); + // 200 and 70 are arbitrary thresholds. + // TODO: better way to set them ? + // If finding a path of depth 6 fails, try to find a path + // of depth 12. + if (CurrentPathRegUsage.empty()) { + if (VregCurrentUsage > 200) { + CurrentPathRegUsage = findPathRegUsage(6, 0, INT_MAX, true); + if (CurrentPathRegUsage.empty()) + CurrentPathRegUsage = findPathRegUsage(12, 0, INT_MAX, true); + } else if (SregCurrentUsage > 70) { + CurrentPathRegUsage = findPathRegUsage(6, INT_MAX, 0, false); + if (CurrentPathRegUsage.empty()) + CurrentPathRegUsage = findPathRegUsage(12, INT_MAX, 0, false); + } + } + + DEBUG( + if (!CurrentPathRegUsage.empty()) { + dbgs() << "Restricting research among: "; + for (SIScheduleBlock* Block : ReadyBlocks) { + if (!CurrentPathRegUsage.count(Block)) + continue; + dbgs() << Block->getID() << ' '; + } + dbgs() << '\n'; + } + ); Cand.Block = nullptr; for (std::vector::iterator I = ReadyBlocks.begin(), E = ReadyBlocks.end(); I != E; ++I) { SIBlockSchedCandidate TryCand; + + if (!CurrentPathRegUsage.empty() && + CurrentPathRegUsage.find(*I) == CurrentPathRegUsage.end()) + continue; + TryCand.Block = *I; TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock(); TryCand.VGPRUsageDiff = @@ -1617,6 +1652,8 @@ Block = Cand.Block; ReadyBlocks.erase(Best); + if (!CurrentPathRegUsage.empty()) + CurrentPathRegUsage.erase(Block); return Block; } @@ -1711,6 +1748,548 @@ return DiffSetPressure; } +// Strategy to reduce register pressure: +// Idea: Reducing register pressure is hard. Heuristics with +// scores have failed (scores for each instruction about a "potential" +// to release registers). +// Instead of using heuristics, try to find a path of instructions with a +// reduced number of instructions, such that at the end of the path the +// number of live registers is reduced. +// +// Algorithm: +// For each alive register or register not produced: +// -> if we need more than k instructions to consume the register, ignore. +// -> else compute the minimal set of registers that would be produced to +// schedule all the instructions needed to consume the register, idem +// for the set of registers that would be consumed (separate currently +// alive registers and intermediate registers). +// +// We then look at the register such that the path to consume it released the +// most registers. +// +// A way to get better results would then to try doing combinations that +// would directly give better consumed/produced ratio. +// +// Another way to improve would be to pick amond all the possible choices +// the best latency-friendly path. + +struct SIBlockInfo { + // All Blocks that have to be scheduled first + Block + SmallPtrSet Dependencies; + // The registers that were initially defined, that + // scheduling both this block and its dependencies + // consumed. + DenseSet ConsumedRegisters; + // Idem for the intermediate registers (that were produced + // at some point, and then consumed). + DenseSet ProducedConsumedRegisters; + // idem for registers that were produced at some point, but + // are still alive. + DenseSet ProducedRegisters; + // Registers that were specifically eaten by this block (identical to + // to LiveInRegs, but in the "unique reg identifier" space). + DenseSet BlockInRegs; + // unique identifier -> which blocks in the Dependencies do eat this register + // We use this only to count how many times a give register, but we use + // set to enable union when merging several branches. + // Registers that are consumed are removed from the map. + DenseMap> RegisterConsumers; + SIBlockInfo() : Dependencies(), ConsumedRegisters(), + ProducedConsumedRegisters(), ProducedRegisters(), BlockInRegs(), + RegisterConsumers() {} +}; + +struct SIMIRegisterInfo { + // The minimal set of Blocks to schedule to release this register. + SmallPtrSet Dependencies; + // The livein registers that are consumed by scheduling the Dependencies. + DenseSet ConsumedRegisters; + // The registers that are produced then consumed by scheduling the + // Dependencies. + DenseSet ProducedConsumedRegisters; + // The registers that are produced then consumed by scheduling the + // Dependencies. + DenseSet ProducedRegisters; + // Idem than for Blocks + DenseMap> RegisterConsumers; + SIMIRegisterInfo() : Dependencies(), ConsumedRegisters(), + ProducedConsumedRegisters(), ProducedRegisters(), RegisterConsumers() {} +#ifndef NDEBUG + void + printDebug(SIScheduleDAGMI *DAG, + DenseMap &IdentifierToReg) + { + dbgs() << "Block list: "; + for (SIScheduleBlock *Block : Dependencies) + dbgs() << Block->getID() << " "; + dbgs() << '\n'; + dbgs() << "Consumed registers: "; + for (unsigned Reg : ConsumedRegisters) { + unsigned RealReg = IdentifierToReg[Reg]; + int DiffV = 0; + int DiffS = 0; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffV -= PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffS -= PSetI.getWeight(); + } + dbgs() << PrintVRegOrUnit(RealReg, DAG->getTRI()); + dbgs() << "(" << DiffV << ", " << DiffS << "), "; + } + dbgs() << '\n'; + dbgs() << "Intermediate registers: "; + for (unsigned Reg : ProducedConsumedRegisters) { + unsigned RealReg = IdentifierToReg[Reg]; + int DiffV = 0; + int DiffS = 0; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffV += PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffS += PSetI.getWeight(); + } + dbgs() << PrintVRegOrUnit(RealReg, DAG->getTRI()); + dbgs() << "(" << DiffV << ", " << DiffS << "), "; + } + dbgs() << '\n'; + dbgs() << "Produced registers: "; + for (unsigned Reg : ProducedRegisters) { + unsigned RealReg = IdentifierToReg[Reg]; + int DiffV = 0; + int DiffS = 0; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffV += PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffS += PSetI.getWeight(); + } + dbgs() << PrintVRegOrUnit(RealReg, DAG->getTRI()); + dbgs() << "(" << DiffV << ", " << DiffS << "), "; + } + dbgs() << '\n'; + } +#endif +}; + +std::set +SIScheduleBlockScheduler::findPathRegUsage(int SearchDepthLimit, + int VGPRDiffGoal, + int SGPRDiffGoal, + bool PriorityVGPR) +{ + DenseSet LiveRegsInitId; + DenseMap RegsConsumers; + + DenseMap CountUsersPerReg; + DenseSet ConsumableRegisters; + + std::vector BlockNumPredsLeftCurrent = BlockNumPredsLeft; + + // We want an unique identifier per register, but a register can be reused + // (input and output of a block). We thus use a mapping + // "unique reg identifier" -> register, + // and an opposite mapping register -> current associated identifier. + DenseMap IdentifierToReg; + DenseMap RegToIdentifier; + unsigned NextIdentifier = 0; + + DenseMap BlockInfos; + std::vector SchedulableBlocks = ReadyBlocks; + std::vector SchedulableBlockNextDepth; + + DenseMap RegisterInfos; + + SmallVector Temp; + + int BestDiffVGPR = INT_MAX; + int BestDiffSGPR = INT_MAX; + + if (ReadyBlocks.empty()) + return std::set(); + + DEBUG(dbgs() << "findPathRegUsage(" << SearchDepthLimit << ")\n"); + DEBUG(dbgs() << "Initial Live regs:\n"); + + // Fill info for initial registers + for (unsigned Reg : LiveRegs) { + // Ignoring physical registers + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + (void) LiveRegsInitId.insert(NextIdentifier); + assert(LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end()); + assert(LiveRegsConsumers[Reg] > 0); + RegsConsumers[NextIdentifier] = LiveRegsConsumers[Reg]; + + IdentifierToReg[NextIdentifier] = Reg; + RegToIdentifier[Reg] = NextIdentifier; + DEBUG(dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << " (--> " << NextIdentifier << ")\n"); + ++NextIdentifier; + } + + // Fill BlockInfos + while (SearchDepthLimit > 0 && !SchedulableBlocks.empty()) { + DEBUG(dbgs() << "Iterating... Remaining levels: " << SearchDepthLimit << '\n'); + for (SIScheduleBlock* Block : SchedulableBlocks) { + SIBlockInfo &BlockInfo = BlockInfos[Block->getID()]; + + DEBUG(dbgs() << "Computing data for Block: " << Block->getID() << '\n'); + + for (SIScheduleBlock *Parent : Block->getPreds()) { + DenseMap::iterator ParentBlockInfoPair = + BlockInfos.find(Parent->getID()); + // We fill the data in a topological sorted order, thus + // all parents are seen before the Block. However we don't + // fill any data if the Parent is already scheduled. + if (ParentBlockInfoPair != BlockInfos.end()) { + SIBlockInfo &ParentBlockInfo = ParentBlockInfoPair->second; + // Add the dependencies of the parents + BlockInfo.Dependencies.insert( + ParentBlockInfo.Dependencies.begin(), + ParentBlockInfo.Dependencies.end()); + // Idem for consumed and produced registers + BlockInfo.ConsumedRegisters.insert( + ParentBlockInfo.ConsumedRegisters.begin(), + ParentBlockInfo.ConsumedRegisters.end()); + BlockInfo.ProducedConsumedRegisters.insert( + ParentBlockInfo.ProducedConsumedRegisters.begin(), + ParentBlockInfo.ProducedConsumedRegisters.end()); + BlockInfo.ProducedRegisters.insert( + ParentBlockInfo.ProducedRegisters.begin(), + ParentBlockInfo.ProducedRegisters.end()); + // Merge the partially consumed registers of the parents and + // their dependencies. + for (const auto RConsumers: ParentBlockInfo.RegisterConsumers) { + DenseMap>::iterator + BlockRegisterConsumersI = BlockInfo.RegisterConsumers.find(RConsumers.first); + // Check it's filled in RegsConsumers + assert(RegsConsumers.find(RConsumers.first) != RegsConsumers.end()); + + // Note: the registers in RegisterConsumers are either in + // LiveRegsCurrent or in BlockInfo.ProducedRegisters. + // In all cases it's registers that aren't consumed yet. + if (BlockRegisterConsumersI == BlockInfo.RegisterConsumers.end()) { + // It's the first time we add the register to the list. + // Copy the list of consumers of the register in the parent's + // Dependencies list. + BlockInfo.RegisterConsumers[RConsumers.first] = + RConsumers.second; + } + else { + SmallPtrSet &BlockRegisterConsumers = + BlockRegisterConsumersI->second; + // It's not the first time. Merge the lists. + BlockRegisterConsumers.insert( + RConsumers.second.begin(), RConsumers.second.end()); + // If the register has all its consumers in the Dependencies + // list, move it from RegisterConsumers to the correct list. + if (BlockRegisterConsumers.size() == + RegsConsumers[RConsumers.first]) { + // Register is consumed, add to the correct list + // Note: the erase invalidates BlockRegisterConsumersI. + BlockInfo.RegisterConsumers.erase(BlockRegisterConsumersI); + if (LiveRegsInitId.count(RConsumers.first)) + BlockInfo.ConsumedRegisters.insert(RConsumers.first); + else { + assert(BlockInfo.ProducedRegisters.find(RConsumers.first) != + BlockInfo.ProducedRegisters.end()); + BlockInfo.ProducedConsumedRegisters.insert(RConsumers.first); + BlockInfo.ProducedRegisters.erase(RConsumers.first); + } + } + } + } + } + } + // At this point, we have merged the data from all parents. + BlockInfo.Dependencies.insert(Block); + + for (unsigned Reg : Block->getInRegs()) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + DEBUG(dbgs() << "InReg : " << PrintVRegOrUnit(Reg, DAG->getTRI()) << '\n'); + assert(RegToIdentifier.find(Reg) != RegToIdentifier.end()); + unsigned RegIdentifier = RegToIdentifier[Reg]; + SmallPtrSet &BlockRegisterConsumers = + BlockInfo.RegisterConsumers[RegIdentifier]; + + // Check it's filled in RegsConsumers + assert(RegsConsumers.find(RegIdentifier) != RegsConsumers.end()); + + BlockInfo.BlockInRegs.insert(RegIdentifier); + ++CountUsersPerReg[RegIdentifier]; + + BlockRegisterConsumers.insert(Block); + // If the register has all its consumers in the Dependencies + // list, move it from RegisterConsumers to the correct list. + if (BlockRegisterConsumers.size() == RegsConsumers[RegIdentifier]) { + // Register is consumed, add to the correct list + BlockInfo.RegisterConsumers.erase(RegIdentifier); + if (LiveRegsInitId.count(RegIdentifier)) + BlockInfo.ConsumedRegisters.insert(RegIdentifier); + else { + assert(BlockInfo.ProducedRegisters.find(RegIdentifier) != + BlockInfo.ProducedRegisters.end()); + BlockInfo.ProducedConsumedRegisters.insert(RegIdentifier); + BlockInfo.ProducedRegisters.erase(RegIdentifier); + } + } + } + for (unsigned Reg : Block->getOutRegs()) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Note: By construction, we can overwrite RegToIdentifier[Reg], + // because we schedule in a valid order (Reg was consumed at this + // point). + unsigned RegIdentifier = NextIdentifier; + IdentifierToReg[RegIdentifier] = Reg; + RegToIdentifier[Reg] = RegIdentifier; + DEBUG(dbgs() << "OutReg : " << PrintVRegOrUnit(Reg, DAG->getTRI()) << '\n'); + ++NextIdentifier; + + RegsConsumers[RegIdentifier] = LiveOutRegsNumUsages[Block->getID()][Reg]; + BlockInfo.ProducedRegisters.insert(RegIdentifier); + } + + for (std::pair Child : + Block->getSuccs()) { + --BlockNumPredsLeftCurrent[Child.first->getID()]; + if (BlockNumPredsLeftCurrent[Child.first->getID()] == 0) { + SchedulableBlockNextDepth.push_back(Child.first); + } + } + } + --SearchDepthLimit; + SchedulableBlocks = SchedulableBlockNextDepth; + SchedulableBlockNextDepth.clear(); + } + + // We have now the info for each block about produced and + // consumed registers for the block and its dependencies. + // We could at this point take the block such that + // consumed - produced has the best score, but working on + // consumable registers (instead of block) gives better result. + // Reason is working on registers contains also some combination of + // blocks and their dependencies (to release a specific registers). + + // To speed up computation, we compute RegisterInfo only for registers + // that can be consumed. + for (const auto RegUserCount : CountUsersPerReg) { + assert (RegUserCount.second <= RegsConsumers[RegUserCount.first]); + if (RegUserCount.second == RegsConsumers[RegUserCount.first]) + ConsumableRegisters.insert(RegUserCount.first); + } + + // Fill RegisterInfos. + for (auto BlockInfoPair : BlockInfos) { + SIBlockInfo &BlockInfo = BlockInfoPair.second; + for (unsigned Reg : BlockInfo.BlockInRegs) { + DenseMap::iterator RegisterInfosI; + if (!ConsumableRegisters.count(Reg)) + continue; + RegisterInfosI = RegisterInfos.find(Reg); + if (RegisterInfosI == RegisterInfos.end()) { + SIMIRegisterInfo &RInfo = RegisterInfos[Reg]; + RInfo.Dependencies = BlockInfo.Dependencies; + RInfo.ConsumedRegisters = BlockInfo.ConsumedRegisters; + RInfo.ProducedConsumedRegisters = + BlockInfo.ProducedConsumedRegisters; + RInfo.ProducedRegisters = BlockInfo.ProducedRegisters; + RInfo.RegisterConsumers = BlockInfo.RegisterConsumers; + } + else { + SIMIRegisterInfo &RInfo = RegisterInfosI->second; + RInfo.Dependencies.insert( + BlockInfo.Dependencies.begin(), + BlockInfo.Dependencies.end()); + RInfo.ConsumedRegisters.insert( + BlockInfo.ConsumedRegisters.begin(), + BlockInfo.ConsumedRegisters.end()); + RInfo.ProducedConsumedRegisters.insert( + BlockInfo.ProducedConsumedRegisters.begin(), + BlockInfo.ProducedConsumedRegisters.end()); + // Contrary to Blocks, we can have things in RInfo ProducedRegisters + // That are in BlockInfo *ConsumedRegisters and vice-versa + // -> Do the Union of everything, then fix. + RInfo.ProducedRegisters.insert( + BlockInfo.ProducedRegisters.begin(), + BlockInfo.ProducedRegisters.end()); + for (const auto RConsumers: BlockInfo.RegisterConsumers) { + DenseMap>::iterator + RegisterConsumerI = RInfo.RegisterConsumers.find(RConsumers.first); + if (RegisterConsumerI == RInfo.RegisterConsumers.end()) { + RInfo.RegisterConsumers[RConsumers.first] = RConsumers.second; + } + else { + RegisterConsumerI->second.insert(RConsumers.second.begin(), + RConsumers.second.end()); + } + } + } + } + } + + // We have done the union of all fields, we must "fix" them by correctly + // detecting consumed registers. + for (DenseMap::iterator I = + RegisterInfos.begin(); I != RegisterInfos.end(); ++I) { + SIMIRegisterInfo &RInfo = I->second; + // Anything in the list of consumed registers need + // to be removed from the list of unconsumed registers. + for (unsigned Reg : RInfo.ConsumedRegisters) { + RInfo.ProducedRegisters.erase(Reg); + RInfo.RegisterConsumers.erase(Reg); + } + for (unsigned Reg : RInfo.ProducedConsumedRegisters) { + RInfo.ProducedRegisters.erase(Reg); + RInfo.RegisterConsumers.erase(Reg); + } + // Detect registers that are in RegisterConsumers and released. + Temp.clear(); // Erasing values invalidate iterator. + for (DenseMap>::iterator I2 = + RInfo.RegisterConsumers.begin(); + I2 != RInfo.RegisterConsumers.end(); ++I2) { + // A register is consumed if all its consumers are listed. + unsigned Reg = I2->first; + bool ToErase = I2->second.size() == RegsConsumers[Reg]; + + if (ToErase) { + Temp.push_back(I2->first); + if (LiveRegsInitId.count(Reg)) + RInfo.ConsumedRegisters.insert(Reg); + else { + assert(RInfo.ProducedRegisters.find(Reg) != + RInfo.ProducedRegisters.end()); + RInfo.ProducedConsumedRegisters.insert(Reg); + RInfo.ProducedRegisters.erase(Reg); + } + } + } + for (unsigned Reg : Temp) { + RInfo.RegisterConsumers.erase(Reg); + } + } + + DEBUG( + for (const auto RInfo : RegisterInfos) { + unsigned Reg = RInfo.first; + dbgs() << Reg << "(" << PrintVRegOrUnit(IdentifierToReg[Reg], + DAG->getTRI()) + << ")" << " :\nConsumed: "; + for (unsigned Reg2 : RInfo.second.ConsumedRegisters) + dbgs() << Reg2 << " "; + dbgs() << "\nProducedConsumed: "; + for (unsigned Reg2 : RInfo.second.ProducedConsumedRegisters) + dbgs() << Reg2 << " "; + dbgs() << "\nProduced: "; + for (unsigned Reg2 : RInfo.second.ProducedRegisters) + dbgs() << Reg2 << " "; + dbgs() << "\nRegisterConsumers: "; + for (const auto RConsumers: RInfo.second.RegisterConsumers) + dbgs() << " " << RConsumers.first << "(" << RConsumers.second.size() << ", " << RegsConsumers[RConsumers.first] << ")"; + dbgs() << "\n\n"; + }); + + // Find the best score + for (const auto RInfo : RegisterInfos) { + int DiffVGPR = 0; + int DiffSGPR = 0; + + for (unsigned Reg : RInfo.second.ConsumedRegisters) { + unsigned RealReg = IdentifierToReg[Reg]; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffVGPR -= PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffSGPR -= PSetI.getWeight(); + } + } + for (unsigned Reg : RInfo.second.ProducedRegisters) { + unsigned RealReg = IdentifierToReg[Reg]; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffVGPR += PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffSGPR += PSetI.getWeight(); + } + } + DEBUG(dbgs() << RInfo.first << ": diff = (" << DiffVGPR << ", " + << DiffSGPR << ")\n"); + // Remove cases that don't match the target. + if (DiffVGPR > VGPRDiffGoal || DiffSGPR > SGPRDiffGoal) + continue; + // Priority to reduce VGPR or SGPR + if (PriorityVGPR) { + if (BestDiffVGPR > DiffVGPR) { + BestDiffVGPR = DiffVGPR; + BestDiffSGPR = DiffSGPR; + } + else if (BestDiffVGPR == DiffVGPR) { + if (BestDiffSGPR > DiffSGPR) + BestDiffSGPR = DiffSGPR; + } + } + else { + if (BestDiffSGPR > DiffSGPR) { + BestDiffVGPR = DiffVGPR; + BestDiffSGPR = DiffSGPR; + } + else if (BestDiffSGPR == DiffSGPR) { + if (BestDiffVGPR > DiffVGPR) + BestDiffVGPR = DiffVGPR; + } + } + } + + // No path found + if (BestDiffVGPR == INT_MAX) { + DEBUG(dbgs() << "No good path found\n"); + return std::set(); + } + + assert (RegisterInfos.size() != 0); + + DEBUG(dbgs() << "Best diff score: (" << BestDiffVGPR << ", " << BestDiffSGPR + << ")\n"); + + for (auto RInfo : RegisterInfos) { + int DiffVGPR = 0; + int DiffSGPR = 0; + for (unsigned Reg : RInfo.second.ConsumedRegisters) { + unsigned RealReg = IdentifierToReg[Reg]; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffVGPR -= PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffSGPR -= PSetI.getWeight(); + } + } + for (unsigned Reg : RInfo.second.ProducedRegisters) { + unsigned RealReg = IdentifierToReg[Reg]; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffVGPR += PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffSGPR += PSetI.getWeight(); + } + } + if (BestDiffVGPR == DiffVGPR && BestDiffSGPR == DiffSGPR) { + DEBUG(RInfo.second.printDebug(DAG, IdentifierToReg)); + return std::set(RInfo.second.Dependencies.begin(), + RInfo.second.Dependencies.end()); + } + } + + llvm_unreachable("internal error"); +} + // SIScheduler // struct SIScheduleBlockResult