Index: lib/Target/AMDGPU/SIMachineScheduler.h =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.h +++ lib/Target/AMDGPU/SIMachineScheduler.h @@ -346,6 +346,7 @@ std::vector BlocksScheduled; unsigned NumBlockScheduled; std::vector ReadyBlocks; + std::set CurrentPathRegUsage; unsigned VregCurrentUsage; unsigned SregCurrentUsage; @@ -429,6 +430,10 @@ void checkRegUsageImpact(unsigned BlockID, int &DiffVGPR, int &DiffSGPR); void schedule(); + std::set findPathRegUsage(int SearchDepthLimit, + int VGPRDiffGoal, + int SGPRDiffGoal, + bool PriorityVGPR); }; struct SIScheduleBlockResult { Index: lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.cpp +++ lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -17,6 +17,9 @@ #include "SIMachineScheduler.h" #include "SIRegisterInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -1756,6 +1759,33 @@ dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n'; ); + // 200 and 70 are arbitrary thresholds. + // TODO: better way to set them ? + // If finding a path of depth 6 fails, try to find a path + // of depth 12. + if (CurrentPathRegUsage.empty()) { + if (VregCurrentUsage > 200) { + CurrentPathRegUsage = findPathRegUsage(6, 0, INT_MAX, true); + if (CurrentPathRegUsage.empty()) + CurrentPathRegUsage = findPathRegUsage(12, 0, INT_MAX, true); + } else if (SregCurrentUsage > 70) { + CurrentPathRegUsage = findPathRegUsage(6, INT_MAX, 0, false); + if (CurrentPathRegUsage.empty()) + CurrentPathRegUsage = findPathRegUsage(12, INT_MAX, 0, false); + } + } + + DEBUG( + if (!CurrentPathRegUsage.empty()) { + dbgs() << "Restricting research among: "; + for (SIScheduleBlock* Block : ReadyBlocks) { + if (!CurrentPathRegUsage.count(Block)) + continue; + dbgs() << Block->getID() << ' '; + } + dbgs() << '\n'; + } + ); Cand.Block = nullptr; for (std::vector::iterator I = ReadyBlocks.begin(), E = ReadyBlocks.end(); I != E; ++I) { @@ -1763,6 +1793,10 @@ unsigned TryCandID; int SGPRUsageDiff; + if (!CurrentPathRegUsage.empty() && + CurrentPathRegUsage.find(*I) == CurrentPathRegUsage.end()) + continue; + TryCand.Block = *I; TryCandID = TryCand.Block->getID(); TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock(); @@ -1805,6 +1839,8 @@ Block = Cand.Block; ReadyBlocks.erase(Best); + if (!CurrentPathRegUsage.empty()) + CurrentPathRegUsage.erase(Block); return Block; } @@ -1929,6 +1965,578 @@ } } +// Strategy to reduce register pressure: +// Idea: Reducing register pressure is hard. Heuristics with +// scores have failed (scores for each instruction about a "potential" +// to release registers). +// Instead of using heuristics, try to find a path of instructions with a +// reduced number of instructions, such that at the end of the path the +// number of live registers is reduced. +// +// Algorithm: +// For each alive register or register not produced: +// -> if we need more than k instructions to consume the register, ignore. +// -> else compute the minimal set of registers that would be produced to +// schedule all the instructions needed to consume the register, idem +// for the set of registers that would be consumed (separate currently +// alive registers and intermediate registers). +// +// We then look at the register such that the path to consume it released the +// most registers. +// +// A way to get better results would then to try doing combinations that +// would directly give better consumed/produced ratio. +// +// Another way to improve would be to pick amond all the possible choices +// the best latency-friendly path. + +struct SIBlockInfo { + // All Blocks that have to be scheduled first + Block + SmallPtrSet Dependencies; + // The registers that were initially defined, that + // scheduling both this block and its dependencies + // consumed. + DenseSet ConsumedRegisters; + // Idem for the intermediate registers (that were produced + // at some point, and then consumed). + DenseSet ProducedConsumedRegisters; + // idem for registers that were produced at some point, but + // are still alive. + DenseSet ProducedRegisters; + // Registers that were specifically eaten by this block (identical to + // to LiveInRegs, but in the "unique reg identifier" space). + DenseSet BlockInRegs; + // unique identifier -> which blocks in the Dependencies do eat this register + // We use this only to count how many times a give register, but we use + // set to enable union when merging several branches. + // Registers that are consumed are removed from the map. + DenseMap> RegisterConsumers; + SIBlockInfo() : Dependencies(), ConsumedRegisters(), + ProducedConsumedRegisters(), ProducedRegisters(), BlockInRegs(), + RegisterConsumers() {} +}; + +struct SIMIRegisterInfo { + // The minimal set of Blocks to schedule to release this register. + SmallPtrSet Dependencies; + // The livein registers that are consumed by scheduling the Dependencies. + DenseSet ConsumedRegisters; + // The registers that are produced then consumed by scheduling the + // Dependencies. + DenseSet ProducedConsumedRegisters; + // The registers that are produced then consumed by scheduling the + // Dependencies. + DenseSet ProducedRegisters; + // Idem than for Blocks + DenseMap> RegisterConsumers; + SIMIRegisterInfo() : Dependencies(), ConsumedRegisters(), + ProducedConsumedRegisters(), ProducedRegisters(), RegisterConsumers() {} +#ifndef NDEBUG + void + printDebug(SIScheduleDAGMI *DAG, + DenseMap &IdentifierToReg) + { + dbgs() << "Block list: "; + for (SIScheduleBlock *Block : Dependencies) + dbgs() << Block->getID() << " "; + dbgs() << '\n'; + dbgs() << "Consumed registers: "; + for (unsigned Reg : ConsumedRegisters) { + RegisterMaskPair &RealReg = IdentifierToReg.find(Reg)->second; + int DiffV = 0; + int DiffS = 0; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg.RegUnit); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffV -= PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffS -= PSetI.getWeight(); + } + dbgs() << PrintVRegOrUnit(RealReg.RegUnit, DAG->getTRI()); + if (!RealReg.LaneMask.all()) + dbgs() << ':' << PrintLaneMask(RealReg.LaneMask); + dbgs() << "(" << DiffV << ", " << DiffS << "), "; + } + dbgs() << '\n'; + dbgs() << "Intermediate registers: "; + for (unsigned Reg : ProducedConsumedRegisters) { + RegisterMaskPair &RealReg = IdentifierToReg.find(Reg)->second; + int DiffV = 0; + int DiffS = 0; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg.RegUnit); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffV += PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffS += PSetI.getWeight(); + } + dbgs() << PrintVRegOrUnit(RealReg.RegUnit, DAG->getTRI()); + if (!RealReg.LaneMask.all()) + dbgs() << ':' << PrintLaneMask(RealReg.LaneMask); + dbgs() << "(" << DiffV << ", " << DiffS << "), "; + } + dbgs() << '\n'; + dbgs() << "Produced registers: "; + for (unsigned Reg : ProducedRegisters) { + RegisterMaskPair &RealReg = IdentifierToReg.find(Reg)->second; + int DiffV = 0; + int DiffS = 0; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RealReg.RegUnit); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == DAG->getVGPRSetID()) + DiffV += PSetI.getWeight(); + if (*PSetI == DAG->getSGPRSetID()) + DiffS += PSetI.getWeight(); + } + dbgs() << PrintVRegOrUnit(RealReg.RegUnit, DAG->getTRI()); + if (!RealReg.LaneMask.all()) + dbgs() << ':' << PrintLaneMask(RealReg.LaneMask); + dbgs() << "(" << DiffV << ", " << DiffS << "), "; + } + dbgs() << '\n'; + } +#endif +}; + +std::set +SIScheduleBlockScheduler::findPathRegUsage(int SearchDepthLimit, + int VGPRDiffGoal, + int SGPRDiffGoal, + bool PriorityVGPR) +{ + DenseSet LiveRegsInitId; + DenseMap RegsConsumers; + + DenseMap CountUsersPerReg; + DenseSet ConsumableRegisters; + + std::vector BlockNumPredsLeftCurrent = BlockNumPredsLeft; + + // We want an unique identifier per register, but a register can be reused + // (input and output of a block). We thus use a mapping + // "unique reg identifier" -> register, + // and an opposite mapping register -> current associated identifier. + // Similar to previously, we will say a given RegisterMaskPair is an + // uniquer register (by construction, the Masks for a same register + // don't intersect). + DenseMap IdentifierToReg; + std::map RegToIdentifier; + unsigned NextIdentifier = 0; + + DenseMap BlockInfos; + std::vector SchedulableBlocks = ReadyBlocks; + std::vector SchedulableBlockNextDepth; + + DenseMap RegisterInfos; + + SmallVector Temp; + + int BestDiffVGPR = INT_MAX; + int BestDiffSGPR = INT_MAX; + unsigned BestDiffReg = ~0u; + + // Used to compute scores + SmallDenseMap Map; + SmallPtrSet Set; + unsigned VGPRSetID = DAG->getVGPRSetID(); + unsigned SGPRSetID = DAG->getSGPRSetID(); + + if (ReadyBlocks.empty()) + return std::set(); + + DEBUG(dbgs() << "findPathRegUsage(" << SearchDepthLimit << ")\n"); + DEBUG(dbgs() << "Initial Live regs:\n"); + + // Fill info for initial registers + for (const auto &RegP : LiveRegs) { + unsigned Reg = RegP.first; + // Ignoring physical registers + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (const auto RegPair : getPairsForReg(Reg, RegP.second)) { + (void) LiveRegsInitId.insert(NextIdentifier); + assert(LiveRegsConsumers.find(RegPair) != LiveRegsConsumers.end()); + assert(LiveRegsConsumers[RegPair] > 0); + RegsConsumers[NextIdentifier] = LiveRegsConsumers[RegPair]; + + IdentifierToReg.insert(std::make_pair(NextIdentifier, RegPair)); + RegToIdentifier[RegPair] = NextIdentifier; + DEBUG(dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()); + if (!RegPair.LaneMask.all()) + dbgs() << ':' << PrintLaneMask(RegPair.LaneMask); + dbgs() << " (--> " << NextIdentifier << ")\n";); + ++NextIdentifier; + } + } + + // Fill BlockInfos + while (SearchDepthLimit > 0 && !SchedulableBlocks.empty()) { + DEBUG(dbgs() << "Iterating... Remaining levels: " << SearchDepthLimit << '\n'); + for (SIScheduleBlock* Block : SchedulableBlocks) { + unsigned ID = Block->getID(); + SIBlockInfo &BlockInfo = BlockInfos[ID]; + + DEBUG(dbgs() << "Computing data for Block: " << ID << '\n'); + + for (SIScheduleBlock *Parent : Block->getPreds()) { + DenseMap::iterator ParentBlockInfoPair = + BlockInfos.find(Parent->getID()); + // We fill the data in a topological sorted order, thus + // all parents are seen before the Block. However we don't + // fill any data if the Parent is already scheduled. + if (ParentBlockInfoPair != BlockInfos.end()) { + SIBlockInfo &ParentBlockInfo = ParentBlockInfoPair->second; + // Add the dependencies of the parents + BlockInfo.Dependencies.insert( + ParentBlockInfo.Dependencies.begin(), + ParentBlockInfo.Dependencies.end()); + // Idem for consumed and produced registers + BlockInfo.ConsumedRegisters.insert( + ParentBlockInfo.ConsumedRegisters.begin(), + ParentBlockInfo.ConsumedRegisters.end()); + BlockInfo.ProducedConsumedRegisters.insert( + ParentBlockInfo.ProducedConsumedRegisters.begin(), + ParentBlockInfo.ProducedConsumedRegisters.end()); + BlockInfo.ProducedRegisters.insert( + ParentBlockInfo.ProducedRegisters.begin(), + ParentBlockInfo.ProducedRegisters.end()); + // Merge the partially consumed registers of the parents and + // their dependencies. + for (const auto &RConsumers: ParentBlockInfo.RegisterConsumers) { + DenseMap>::iterator + BlockRegisterConsumersI = BlockInfo.RegisterConsumers.find(RConsumers.first); + // Check it's filled in RegsConsumers + assert(RegsConsumers.find(RConsumers.first) != RegsConsumers.end()); + + // Note: the registers in RegisterConsumers are either in + // LiveRegsCurrent or in BlockInfo.ProducedRegisters. + // In all cases it's registers that aren't consumed yet. + if (BlockRegisterConsumersI == BlockInfo.RegisterConsumers.end()) { + // It's the first time we add the register to the list. + // Copy the list of consumers of the register in the parent's + // Dependencies list. + BlockInfo.RegisterConsumers[RConsumers.first] = + RConsumers.second; + } + else { + SmallPtrSet &BlockRegisterConsumers = + BlockRegisterConsumersI->second; + // It's not the first time. Merge the lists. + BlockRegisterConsumers.insert( + RConsumers.second.begin(), RConsumers.second.end()); + // If the register has all its consumers in the Dependencies + // list, move it from RegisterConsumers to the correct list. + if (BlockRegisterConsumers.size() == + RegsConsumers[RConsumers.first]) { + // Register is consumed, add to the correct list + // Note: the erase invalidates BlockRegisterConsumersI. + BlockInfo.RegisterConsumers.erase(BlockRegisterConsumersI); + if (LiveRegsInitId.count(RConsumers.first)) + BlockInfo.ConsumedRegisters.insert(RConsumers.first); + else { + assert(BlockInfo.ProducedRegisters.find(RConsumers.first) != + BlockInfo.ProducedRegisters.end()); + BlockInfo.ProducedConsumedRegisters.insert(RConsumers.first); + BlockInfo.ProducedRegisters.erase(RConsumers.first); + } + } + } + } + } + } + // At this point, we have merged the data from all parents. + BlockInfo.Dependencies.insert(Block); + + for (const auto &RegPair : InRegsForBlock[ID]) { + unsigned Reg = RegPair.RegUnit; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + DEBUG(dbgs() << "InReg : " << PrintVRegOrUnit(Reg, DAG->getTRI()); + if (!RegPair.LaneMask.all()) + dbgs() << ':' << PrintLaneMask(RegPair.LaneMask); + dbgs () << '\n'); + assert(RegToIdentifier.find(RegPair) != RegToIdentifier.end()); + unsigned RegIdentifier = RegToIdentifier[RegPair]; + SmallPtrSet &BlockRegisterConsumers = + BlockInfo.RegisterConsumers[RegIdentifier]; + + // Check it's filled in RegsConsumers + assert(RegsConsumers.find(RegIdentifier) != RegsConsumers.end()); + + BlockInfo.BlockInRegs.insert(RegIdentifier); + ++CountUsersPerReg[RegIdentifier]; + + BlockRegisterConsumers.insert(Block); + // If the register has all its consumers in the Dependencies + // list, move it from RegisterConsumers to the correct list. + if (BlockRegisterConsumers.size() == RegsConsumers[RegIdentifier]) { + // Register is consumed, add to the correct list + BlockInfo.RegisterConsumers.erase(RegIdentifier); + if (LiveRegsInitId.count(RegIdentifier)) + BlockInfo.ConsumedRegisters.insert(RegIdentifier); + else { + assert(BlockInfo.ProducedRegisters.find(RegIdentifier) != + BlockInfo.ProducedRegisters.end()); + BlockInfo.ProducedConsumedRegisters.insert(RegIdentifier); + BlockInfo.ProducedRegisters.erase(RegIdentifier); + } + } + } + for (const auto &RegPair : OutRegsForBlock[ID]) { + unsigned Reg = RegPair.RegUnit; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Note: By construction, we can overwrite RegToIdentifier[Reg], + // because we schedule in a valid order (Reg was consumed at this + // point). + unsigned RegIdentifier = NextIdentifier; + IdentifierToReg.insert(std::make_pair(RegIdentifier, RegPair)); + RegToIdentifier[RegPair] = RegIdentifier; + DEBUG(dbgs() << "OutReg : " << PrintVRegOrUnit(Reg, DAG->getTRI()); + if (!RegPair.LaneMask.all()) + dbgs() << ':' << PrintLaneMask(RegPair.LaneMask); + dbgs () << '\n'); + ++NextIdentifier; + + RegsConsumers[RegIdentifier] = LiveOutRegsNumUsages[ID][RegPair]; + BlockInfo.ProducedRegisters.insert(RegIdentifier); + } + + for (std::pair Child : + Block->getSuccs()) { + --BlockNumPredsLeftCurrent[Child.first->getID()]; + if (BlockNumPredsLeftCurrent[Child.first->getID()] == 0) { + SchedulableBlockNextDepth.push_back(Child.first); + } + } + } + --SearchDepthLimit; + SchedulableBlocks = SchedulableBlockNextDepth; + SchedulableBlockNextDepth.clear(); + } + + // We have now the info for each block about produced and + // consumed registers for the block and its dependencies. + // We could at this point take the block such that + // consumed - produced has the best score, but working on + // consumable registers (instead of block) gives better result. + // Reason is working on registers contains also some combination of + // blocks and their dependencies (to release a specific registers). + + // To speed up computation, we compute RegisterInfo only for registers + // that can be consumed. + for (const auto &RegUserCount : CountUsersPerReg) { + assert (RegUserCount.second <= RegsConsumers[RegUserCount.first]); + if (RegUserCount.second == RegsConsumers[RegUserCount.first]) + ConsumableRegisters.insert(RegUserCount.first); + } + + // Fill RegisterInfos. + for (auto BlockInfoPair : BlockInfos) { + SIBlockInfo &BlockInfo = BlockInfoPair.second; + for (unsigned Reg : BlockInfo.BlockInRegs) { + DenseMap::iterator RegisterInfosI; + if (!ConsumableRegisters.count(Reg)) + continue; + RegisterInfosI = RegisterInfos.find(Reg); + if (RegisterInfosI == RegisterInfos.end()) { + SIMIRegisterInfo &RInfo = RegisterInfos[Reg]; + RInfo.Dependencies = BlockInfo.Dependencies; + RInfo.ConsumedRegisters = BlockInfo.ConsumedRegisters; + RInfo.ProducedConsumedRegisters = + BlockInfo.ProducedConsumedRegisters; + RInfo.ProducedRegisters = BlockInfo.ProducedRegisters; + RInfo.RegisterConsumers = BlockInfo.RegisterConsumers; + } + else { + SIMIRegisterInfo &RInfo = RegisterInfosI->second; + RInfo.Dependencies.insert( + BlockInfo.Dependencies.begin(), + BlockInfo.Dependencies.end()); + RInfo.ConsumedRegisters.insert( + BlockInfo.ConsumedRegisters.begin(), + BlockInfo.ConsumedRegisters.end()); + RInfo.ProducedConsumedRegisters.insert( + BlockInfo.ProducedConsumedRegisters.begin(), + BlockInfo.ProducedConsumedRegisters.end()); + // Contrary to Blocks, we can have things in RInfo ProducedRegisters + // That are in BlockInfo *ConsumedRegisters and vice-versa + // -> Do the Union of everything, then fix. + RInfo.ProducedRegisters.insert( + BlockInfo.ProducedRegisters.begin(), + BlockInfo.ProducedRegisters.end()); + for (const auto &RConsumers: BlockInfo.RegisterConsumers) { + DenseMap>::iterator + RegisterConsumerI = RInfo.RegisterConsumers.find(RConsumers.first); + if (RegisterConsumerI == RInfo.RegisterConsumers.end()) { + RInfo.RegisterConsumers[RConsumers.first] = RConsumers.second; + } + else { + RegisterConsumerI->second.insert(RConsumers.second.begin(), + RConsumers.second.end()); + } + } + } + } + } + + // We have done the union of all fields, we must "fix" them by correctly + // detecting consumed registers. + for (DenseMap::iterator I = + RegisterInfos.begin(); I != RegisterInfos.end(); ++I) { + SIMIRegisterInfo &RInfo = I->second; + // Anything in the list of consumed registers need + // to be removed from the list of unconsumed registers. + for (unsigned Reg : RInfo.ConsumedRegisters) { + RInfo.ProducedRegisters.erase(Reg); + RInfo.RegisterConsumers.erase(Reg); + } + for (unsigned Reg : RInfo.ProducedConsumedRegisters) { + RInfo.ProducedRegisters.erase(Reg); + RInfo.RegisterConsumers.erase(Reg); + } + // Detect registers that are in RegisterConsumers and released. + Temp.clear(); // Erasing values invalidate iterator. + for (DenseMap>::iterator I2 = + RInfo.RegisterConsumers.begin(); + I2 != RInfo.RegisterConsumers.end(); ++I2) { + // A register is consumed if all its consumers are listed. + unsigned Reg = I2->first; + bool ToErase = I2->second.size() == RegsConsumers[Reg]; + + if (ToErase) { + Temp.push_back(I2->first); + if (LiveRegsInitId.count(Reg)) + RInfo.ConsumedRegisters.insert(Reg); + else { + assert(RInfo.ProducedRegisters.find(Reg) != + RInfo.ProducedRegisters.end()); + RInfo.ProducedConsumedRegisters.insert(Reg); + RInfo.ProducedRegisters.erase(Reg); + } + } + } + for (unsigned Reg : Temp) { + RInfo.RegisterConsumers.erase(Reg); + } + } + + DEBUG( + for (const auto &RInfo : RegisterInfos) { + unsigned Reg = RInfo.first; + RegisterMaskPair &RealReg = IdentifierToReg.find(Reg)->second; + dbgs() << Reg << "(" << PrintVRegOrUnit(RealReg.RegUnit, DAG->getTRI()); + if (!RealReg.LaneMask.all()) + dbgs() << ':' << PrintLaneMask(RealReg.LaneMask); + dbgs() << ")" << " :\nConsumed: "; + for (unsigned Reg2 : RInfo.second.ConsumedRegisters) + dbgs() << Reg2 << " "; + dbgs() << "\nProducedConsumed: "; + for (unsigned Reg2 : RInfo.second.ProducedConsumedRegisters) + dbgs() << Reg2 << " "; + dbgs() << "\nProduced: "; + for (unsigned Reg2 : RInfo.second.ProducedRegisters) + dbgs() << Reg2 << " "; + dbgs() << "\nRegisterConsumers: "; + for (const auto &RConsumers: RInfo.second.RegisterConsumers) + dbgs() << " " << RConsumers.first << "(" << RConsumers.second.size() << ", " << RegsConsumers[RConsumers.first] << ")"; + dbgs() << "\n\n"; + }); + + // Find the best score + for (const auto &RInfo : RegisterInfos) { + int DiffVGPR = 0; + int DiffSGPR = 0; + + Map.clear(); + Set.clear(); + + for (unsigned Reg : RInfo.second.ConsumedRegisters) { + RegisterMaskPair &RealReg = IdentifierToReg.find(Reg)->second; + Map[RealReg.RegUnit] |= RealReg.LaneMask; + } + + for (const auto &RegPair : Map) { + if (LiveRegs[RegPair.first] == RegPair.second) { + PSetIterator PSetI = DAG->getMRI()->getPressureSets(RegPair.first); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == VGPRSetID) + DiffVGPR -= PSetI.getWeight(); + if (*PSetI == SGPRSetID) + DiffSGPR -= PSetI.getWeight(); + } + } + } + + for (unsigned Reg : RInfo.second.ProducedRegisters) { + RegisterMaskPair &RealReg = IdentifierToReg.find(Reg)->second; + Set.insert(RealReg.RegUnit); + } + + for (unsigned Reg : Set) { + // Check register is not already alive (at least some lanes) + if (LiveRegs.find(Reg) == LiveRegs.end()) { + PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == VGPRSetID) + DiffVGPR += PSetI.getWeight(); + if (*PSetI == SGPRSetID) + DiffSGPR += PSetI.getWeight(); + } + } + } + + DEBUG(dbgs() << RInfo.first << ": diff = (" << DiffVGPR << ", " + << DiffSGPR << ")\n"); + // Remove cases that don't match the target. + if (DiffVGPR > VGPRDiffGoal || DiffSGPR > SGPRDiffGoal) + continue; + // Priority to reduce VGPR or SGPR + if (PriorityVGPR) { + if (BestDiffVGPR > DiffVGPR) { + BestDiffVGPR = DiffVGPR; + BestDiffSGPR = DiffSGPR; + BestDiffReg = RInfo.first; + } + else if (BestDiffVGPR == DiffVGPR) { + if (BestDiffSGPR > DiffSGPR) { + BestDiffSGPR = DiffSGPR; + BestDiffReg = RInfo.first; + } + } + } + else { + if (BestDiffSGPR > DiffSGPR) { + BestDiffVGPR = DiffVGPR; + BestDiffSGPR = DiffSGPR; + BestDiffReg = RInfo.first; + } + else if (BestDiffSGPR == DiffSGPR) { + if (BestDiffVGPR > DiffVGPR) { + BestDiffVGPR = DiffVGPR; + BestDiffReg = RInfo.first; + } + } + } + } + + // No path found + if (BestDiffVGPR == INT_MAX) { + DEBUG(dbgs() << "No good path found\n"); + return std::set(); + } + + assert (RegisterInfos.size() != 0); + + DEBUG(dbgs() << "Best diff score: (" << BestDiffVGPR << ", " << BestDiffSGPR + << ")\n"); + + SIMIRegisterInfo &RInfo = RegisterInfos[BestDiffReg]; + DEBUG(RInfo.printDebug(DAG, IdentifierToReg)); + return std::set(RInfo.Dependencies.begin(), + RInfo.Dependencies.end()); +} + // SIScheduler // struct SIScheduleBlockResult