diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -791,22 +791,13 @@ void dumpScheduledState() const; }; +unsigned getWeakLeft(const SUnit *SU, bool isTop); + /// Base class for GenericScheduler. This class maintains information about /// scheduling candidates based on TargetSchedModel making it easy to implement /// heuristics for either preRA or postRA scheduling. class GenericSchedulerBase : public MachineSchedStrategy { public: - /// Represent the type of SchedCandidate found within a single queue. - /// pickNodeBidirectional depends on these listed by decreasing priority. - enum CandReason : uint8_t { - NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, - RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; - -#ifndef NDEBUG - static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); -#endif - /// Policy for scheduling the next instruction in the candidate's zone. struct CandPolicy { bool ReduceLatency = false; @@ -844,6 +835,219 @@ } }; + struct SchedCandidate; + /// Represent the heuristic we used for either PreRA or PostRA scheduling. + struct SchedHeuristic { + SchedHeuristic(StringRef Name) : Name(Name) {} + virtual ~SchedHeuristic() {} + + // Return either the Cand or TryCand if either wins, otherwise return + // nullptr if none wins. + virtual SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) { + return nullptr; + }; + // Heuristic name + std::string Name; + // The priority of this heuristic. The smaller the higher priority. + int Priority = -1; + }; + + // Bias PhysReg Defs and copies to their uses and defined respectively. + struct PhysRegHeuristic : public SchedHeuristic { + PhysRegHeuristic() : SchedHeuristic("PYHS-REG") {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + return tryLess(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), Cand, TryCand); + } + }; + + // Fall through to original instruction order. + struct NodeOrderHeuristic : public SchedHeuristic { + NodeOrderHeuristic() : SchedHeuristic("ORDER") {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (Zone && ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || + (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum))) + return &TryCand; + return nullptr; + } + }; + + // Debug use only + struct Only1Heuristic : public SchedHeuristic { + Only1Heuristic() : SchedHeuristic("ONLY1") {} + }; + + // Weak edges are for clustering and other constraints. + struct WeakHeuristic : public SchedHeuristic { + WeakHeuristic() : SchedHeuristic("WEAK") {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone) + return nullptr; + return tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), + getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand); + } + }; + + // Prioritize instructions that read unbuffered resources by stall cycles. + struct StallHeuristic : public SchedHeuristic { + StallHeuristic() : SchedHeuristic("STALL") {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone) + return nullptr; + return tryLess(Zone->getLatencyStallCycles(TryCand.SU), + Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand); + } + }; + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // This is a best effort to set things up for a post-RA pass. + // Optimizations like generating loads of multiple registers should ideally + // be done within the scheduler pass by combining the loads during DAG + // postprocessing. + struct ClusterHeuristic : public SchedHeuristic { + ScheduleDAGMI *&DAG; + ClusterHeuristic(ScheduleDAGMI *&DAG) + : SchedHeuristic("CLUSTER"), DAG(DAG) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + return tryLess(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, Cand, TryCand); + } + }; + + // For loops that are acyclic path limited, aggressively schedule for latency. + // Within an single cycle, whenever CurrMOps > 0, allow normal heuristics to + // take precedence. + // Prefer the candidate with the lesser depth, but only if one of them has + // depth greater than the total latency scheduled so far, otherwise either of + // them could be scheduled now with no stall. + struct TopDepthReduceHeuristic : public SchedHeuristic { + bool AcyLimited = false; + SchedRemainder &Rem; + TopDepthReduceHeuristic(SchedRemainder &Rem, bool AcyLimited = false) + : SchedHeuristic("TOP-DEPTH"), AcyLimited(AcyLimited), Rem(Rem) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone || !Zone->isTop()) + return nullptr; + if (AcyLimited && (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps())) + return nullptr; + if (!AcyLimited && + (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited)) + return nullptr; + if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) <= + Zone->getScheduledLatency()) + return nullptr; + return tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), TryCand, + Cand); + } + }; + + struct TopPathReduceHeuristic : public SchedHeuristic { + bool AcyLimited = false; + SchedRemainder &Rem; + TopPathReduceHeuristic(SchedRemainder &Rem, bool AcyLimited = false) + : SchedHeuristic("TOP-PATH"), AcyLimited(AcyLimited), Rem(Rem) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone || !Zone->isTop()) + return nullptr; + if (AcyLimited && (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps())) + return nullptr; + if (!AcyLimited && + (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited)) + return nullptr; + return tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), Cand, + TryCand); + } + }; + + // Prefer the candidate with the lesser height, but only if one of them + // has height greater than the total latency scheduled so far, otherwise + // either of them could be scheduled now with no stall. + struct BotHeightReduceHeuristic : public SchedHeuristic { + bool AcyLimited = false; + SchedRemainder &Rem; + BotHeightReduceHeuristic(SchedRemainder &Rem, bool AcyLimited = false) + : SchedHeuristic("BOT-HEIGHT"), AcyLimited(AcyLimited), Rem(Rem) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone || Zone->isTop()) + return nullptr; + if (AcyLimited && (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps())) + return nullptr; + if (!AcyLimited && + (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited)) + return nullptr; + if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) <= + Zone->getScheduledLatency()) + return nullptr; + return tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, + Cand); + } + }; + + struct BotPathReduceHeuristic : public SchedHeuristic { + bool AcyLimited = false; + SchedRemainder &Rem; + BotPathReduceHeuristic(SchedRemainder &Rem, bool AcyLimited = false) + : SchedHeuristic("BOT-PATH"), AcyLimited(AcyLimited), Rem(Rem) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone || Zone->isTop()) + return nullptr; + if (AcyLimited && (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps())) + return nullptr; + if (!AcyLimited && + (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited)) + return nullptr; + return tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), Cand, + TryCand); + } + }; + + // Avoid critical resource consumption and balance the schedule. + struct ResourceReduceHeuristic : public SchedHeuristic { + bool initResource = false; + ScheduleDAGMI *&DAG; + const TargetSchedModel *&SchedModel; + ResourceReduceHeuristic(ScheduleDAGMI *&DAG, + const TargetSchedModel *&SchedModel, + bool initResource = true) + : SchedHeuristic("RES-REDUCE"), initResource(initResource), DAG(DAG), + SchedModel(SchedModel) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone) + return nullptr; + if (initResource) + TryCand.initResourceDelta(DAG, SchedModel); + return tryLess(TryCand.ResDelta.CritResources, + Cand.ResDelta.CritResources, TryCand, Cand); + } + }; + + struct ResourceDemandHeuristic : public SchedHeuristic { + ResourceDemandHeuristic() : SchedHeuristic("RES-DEMAND") {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + if (!Zone) + return nullptr; + return tryLess(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, Cand, TryCand); + } + }; + /// Store the state used by GenericScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { @@ -852,8 +1056,8 @@ // The best SUnit candidate. SUnit *SU; - // The reason for this candidate. - CandReason Reason; + // The heuristic reason for this candidate. + const SchedHeuristic *Reason = nullptr; // Whether this candidate should be scheduled at top/bottom. bool AtTop; @@ -870,7 +1074,7 @@ void reset(const CandPolicy &NewPolicy) { Policy = NewPolicy; SU = nullptr; - Reason = NoCand; + Reason = nullptr; AtTop = false; RPDelta = RegPressureDelta(); ResDelta = SchedResourceDelta(); @@ -880,7 +1084,7 @@ // Copy the status of another candidate without changing policy. void setBest(SchedCandidate &Best) { - assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + assert(Best.Reason && "uninitialized Sched candidate"); SU = Best.SU; Reason = Best.Reason; AtTop = Best.AtTop; @@ -892,6 +1096,12 @@ const TargetSchedModel *SchedModel); }; +private: + // Keep all the heuristics. Notice that, the earlier it is pushed into the + // vector, the higher priority it has. + SmallVector Heuristics; + ScheduleDAGMI *DAG = nullptr; + protected: const MachineSchedContext *Context; const TargetSchedModel *SchedModel = nullptr; @@ -899,49 +1109,94 @@ SchedRemainder Rem; - GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} + // Predefined heuristics + PhysRegHeuristic PhysReg; + NodeOrderHeuristic NodeOrder; + Only1Heuristic Only1; + StallHeuristic Stall; + ClusterHeuristic Cluster; + TopDepthReduceHeuristic AcyLimitedTopDepthReduce; + TopPathReduceHeuristic AcyLimitedTopPathReduce; + BotHeightReduceHeuristic AcyLimitedBotHeightReduce; + BotPathReduceHeuristic AcyLimitedBotPathReduce; + ResourceReduceHeuristic ResourceReduce; + ResourceDemandHeuristic ResourceDemand; + TopDepthReduceHeuristic TopDepthReduce; + TopPathReduceHeuristic TopPathReduce; + BotHeightReduceHeuristic BotHeightReduce; + BotPathReduceHeuristic BotPathReduce; + WeakHeuristic Weak; + + GenericSchedulerBase(const MachineSchedContext *C) + : Context(C), Cluster(DAG), AcyLimitedTopDepthReduce(Rem, true), + AcyLimitedTopPathReduce(Rem, true), + AcyLimitedBotHeightReduce(Rem, true), + AcyLimitedBotPathReduce(Rem, true), + ResourceReduce(DAG, SchedModel, true), TopDepthReduce(Rem), + TopPathReduce(Rem), BotHeightReduce(Rem), BotPathReduce(Rem) {} void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone); + void initialize(ScheduleDAGMI *Dag) override; + + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const; + + bool hasHigherPriority(const SchedHeuristic *Reason1, + const SchedHeuristic *Reason2) const { + if (!Reason1) + return false; + if (!Reason2) + return true; + assert(Reason1->Priority >= 0 && Reason2->Priority >= 0 && + "Uninitialized heuristic priority"); + return Reason1->Priority < Reason2->Priority; + } + + void registerHeuristic(SchedHeuristic &Heuristic, + SchedHeuristic *Before = nullptr) { + assert(!std::count(Heuristics.begin(), Heuristics.end(), &Heuristic) && + "Heuristic is added multiple times"); + if (!Before) + Heuristics.push_back(&Heuristic); + else { + auto Pos = std::find(Heuristics.begin(), Heuristics.end(), Before); + assert(Pos != Heuristics.end() && + "Don't know how to register the heuristics"); + Heuristics.insert(Pos, &Heuristic); + } + } + + // Register all the heuristics. + virtual void registerHeuristics(){}; + #ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); + virtual void traceCandidate(const SchedCandidate &Cand); #endif + // Utility functions used by heuristics in tryCandidate(). + static SchedCandidate *tryLess(int TryVal, int CandVal, + SchedCandidate &TryCand, SchedCandidate &Cand); + static SchedCandidate * + tryPressure(const PressureChange &TryP, const PressureChange &CandP, + SchedCandidate &TryCand, SchedCandidate &Cand, + const TargetRegisterInfo *TRI, const MachineFunction &MF); + static int biasPhysReg(const SUnit *SU, bool isTop); + private: bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, bool ComputeRemLatency, unsigned &RemLatency) const; }; -// Utility functions used by heuristics in tryCandidate(). -bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason); -bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason); -bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone); -bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF); -unsigned getWeakLeft(const SUnit *SU, bool isTop); -int biasPhysReg(const SUnit *SU, bool isTop); - /// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. class GenericScheduler : public GenericSchedulerBase { public: - GenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), - Bot(SchedBoundary::BotQID, "BotQ") {} + GenericScheduler(const MachineSchedContext *C) + : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), + Bot(SchedBoundary::BotQID, "BotQ"), RegExcess(DAG, TRI), + RegCritMax(DAG, TRI), RegMax(DAG, TRI) {} void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, @@ -981,6 +1236,45 @@ void registerRoots() override; + // Avoid exceeding the target's limit. + struct RegExcessHeuristic : public SchedHeuristic { + ScheduleDAGMILive *&DAG; + const TargetRegisterInfo *&TRI; + RegExcessHeuristic(ScheduleDAGMILive *&DAG, const TargetRegisterInfo *&TRI) + : SchedHeuristic("REG-EXCESS"), DAG(DAG), TRI(TRI) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + return tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, + Cand, TRI, DAG->MF); + } + }; + + // Avoid increasing the max critical pressure in the scheduled region. + struct RegCritMaxHeuristic : public SchedHeuristic { + ScheduleDAGMILive *&DAG; + const TargetRegisterInfo *&TRI; + RegCritMaxHeuristic(ScheduleDAGMILive *&DAG, const TargetRegisterInfo *&TRI) + : SchedHeuristic("REG-CRIT"), DAG(DAG), TRI(TRI) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + return tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, TRI, DAG->MF); + } + }; + + // Avoid increasing the max pressure of the entire region. + struct RegMaxHeuristic : public SchedHeuristic { + ScheduleDAGMILive *&DAG; + const TargetRegisterInfo *&TRI; + RegMaxHeuristic(ScheduleDAGMILive *&DAG, const TargetRegisterInfo *&TRI) + : SchedHeuristic("REG-MAX"), DAG(DAG), TRI(TRI) {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override { + return tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, + TryCand, Cand, TRI, DAG->MF); + } + }; + protected: ScheduleDAGMILive *DAG = nullptr; @@ -995,15 +1289,20 @@ /// Candidate last picked from Bot boundary. SchedCandidate BotCand; + RegExcessHeuristic RegExcess; + RegCritMaxHeuristic RegCritMax; + RegMaxHeuristic RegMax; + + void registerHeuristics() override; void checkAcyclicLatency(); void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, - SchedBoundary *Zone) const; - +#ifndef NDEBUG + void traceCandidate(const SchedCandidate &Cand) override; +#endif SUnit *pickNodeBidirectional(bool &IsTopNode); void pickNodeFromQueue(SchedBoundary &Zone, @@ -1025,9 +1324,12 @@ SchedBoundary Top; SmallVector BotRoots; + ResourceReduceHeuristic ResReducePostRA; + public: - PostGenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} + PostGenericScheduler(const MachineSchedContext *C) + : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), + ResReducePostRA(DAG, SchedModel, false) {} ~PostGenericScheduler() override = default; @@ -1064,9 +1366,11 @@ } protected: - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); - void pickNodeFromQueue(SchedCandidate &Cand); + void registerHeuristics() override; +#ifndef NDEBUG + void traceCandidate(const SchedCandidate &Cand) override; +#endif }; /// Create the standard converging machine scheduler. This will be used as the diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2540,6 +2540,46 @@ } } +void GenericSchedulerBase::initialize(ScheduleDAGMI *Dag) { + DAG = Dag; + // Register all the heuristics. + Heuristics.clear(); + registerHeuristics(); + // Assign the priority for each heuristics. + for (unsigned i = 0; i < Heuristics.size(); ++i) + Heuristics[i]->Priority = i; +} + +/// Apply a set of heuristics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending, or nullptr +// if Cand is from a different zone than TryCand. +void GenericSchedulerBase::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = &NodeOrder; + return; + } + + for (auto Reason : Heuristics) { + if (SchedCandidate *SC = Reason->Try(Cand, TryCand, Zone)) { + assert((SC == &Cand || SC == &TryCand) && + "Either Cand or TryCand is expected"); + if (SC == &TryCand || hasHigherPriority(Reason, SC->Reason)) + SC->Reason = Reason; + break; + } + } +} + /// Compute remaining latency. We need this both to determine whether the /// overall schedule has become latency-limited and whether the instructions /// outside this zone are resource or latency limited. @@ -2643,71 +2683,31 @@ } #ifndef NDEBUG -const char *GenericSchedulerBase::getReasonStr( - GenericSchedulerBase::CandReason Reason) { - switch (Reason) { - case NoCand: return "NOCAND "; - case Only1: return "ONLY1 "; - case PhysReg: return "PHYS-REG "; - case RegExcess: return "REG-EXCESS"; - case RegCritical: return "REG-CRIT "; - case Stall: return "STALL "; - case Cluster: return "CLUSTER "; - case Weak: return "WEAK "; - case RegMax: return "REG-MAX "; - case ResourceReduce: return "RES-REDUCE"; - case ResourceDemand: return "RES-DEMAND"; - case TopDepthReduce: return "TOP-DEPTH "; - case TopPathReduce: return "TOP-PATH "; - case BotHeightReduce:return "BOT-HEIGHT"; - case BotPathReduce: return "BOT-PATH "; - case NextDefUse: return "DEF-USE "; - case NodeOrder: return "ORDER "; - }; - llvm_unreachable("Unknown reason!"); -} - void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { - PressureChange P; unsigned ResIdx = 0; unsigned Latency = 0; - switch (Cand.Reason) { - default: - break; - case RegExcess: - P = Cand.RPDelta.Excess; - break; - case RegCritical: - P = Cand.RPDelta.CriticalMax; - break; - case RegMax: - P = Cand.RPDelta.CurrentMax; - break; - case ResourceReduce: + + if (Cand.Reason == &ResourceReduce) ResIdx = Cand.Policy.ReduceResIdx; - break; - case ResourceDemand: + else if (Cand.Reason == &ResourceDemand) ResIdx = Cand.Policy.DemandResIdx; - break; - case TopDepthReduce: + else if (Cand.Reason == &AcyLimitedTopDepthReduce || + Cand.Reason == &TopDepthReduce) Latency = Cand.SU->getDepth(); - break; - case TopPathReduce: + else if (Cand.Reason == &AcyLimitedTopPathReduce || + Cand.Reason == &TopPathReduce) Latency = Cand.SU->getHeight(); - break; - case BotHeightReduce: + else if (Cand.Reason == &AcyLimitedBotHeightReduce || + Cand.Reason == &BotHeightReduce) Latency = Cand.SU->getHeight(); - break; - case BotPathReduce: + else if (Cand.Reason == &AcyLimitedBotPathReduce || + Cand.Reason == &BotPathReduce) Latency = Cand.SU->getDepth(); - break; - } - dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); - if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) - << ":" << P.getUnitInc() << " "; - else - dbgs() << " "; + + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " + << (Cand.Reason ? Cand.Reason->Name : "NOCAND"); + + dbgs() << " "; if (ResIdx) dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; else @@ -2720,77 +2720,21 @@ } #endif -namespace llvm { -/// Return true if this heuristic determines order. -bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { - if (TryVal < CandVal) { - TryCand.Reason = Reason; - return true; - } - if (TryVal > CandVal) { - if (Cand.Reason > Reason) - Cand.Reason = Reason; - return true; - } - return false; -} - -bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { - if (TryVal > CandVal) { - TryCand.Reason = Reason; - return true; - } - if (TryVal < CandVal) { - if (Cand.Reason > Reason) - Cand.Reason = Reason; - return true; - } - return false; -} - -bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { - if (Zone.isTop()) { - // Prefer the candidate with the lesser depth, but only if one of them has - // depth greater than the total latency scheduled so far, otherwise either - // of them could be scheduled now with no stall. - if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) > - Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) - return true; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericSchedulerBase::TopPathReduce)) - return true; - } else { - // Prefer the candidate with the lesser height, but only if one of them has - // height greater than the total latency scheduled so far, otherwise either - // of them could be scheduled now with no stall. - if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) > - Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) - return true; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericSchedulerBase::BotPathReduce)) - return true; - } - return false; +/// Return the candidate if this heuristic determines order. +GenericSchedulerBase::SchedCandidate * +GenericSchedulerBase::tryLess(int TryVal, int CandVal, SchedCandidate &TryCand, + SchedCandidate &Cand) { + if (TryVal < CandVal) + return &TryCand; + if (TryVal > CandVal) + return &Cand; + return nullptr; } -} // end namespace llvm -static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { +static void tracePick(const GenericSchedulerBase::SchedHeuristic *Reason, + bool IsTop) { LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << GenericSchedulerBase::getReasonStr(Reason) << '\n'); + << (Reason ? Reason->Name : "NOCAND") << '\n'); } static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { @@ -2801,6 +2745,7 @@ assert(dag->hasVRegLiveness() && "(PreRA)GenericScheduler needs vreg liveness"); DAG = static_cast(dag); + SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; @@ -2828,6 +2773,8 @@ } TopCand.SU = nullptr; BotCand.SU = nullptr; + + GenericSchedulerBase::initialize(dag); } /// Initialize the per-region scheduling policy. @@ -2946,32 +2893,27 @@ } } -namespace llvm { -bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) { +GenericSchedulerBase::SchedCandidate *GenericSchedulerBase::tryPressure( + const PressureChange &TryP, const PressureChange &CandP, + SchedCandidate &TryCand, SchedCandidate &Cand, + const TargetRegisterInfo *TRI, const MachineFunction &MF) { // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. - if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, - Reason)) { - return true; - } + if (auto Result = + tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, Cand, TryCand)) + return Result; + // Do not compare the magnitude of pressure changes between top and bottom // boundary. if (Cand.AtTop != TryCand.AtTop) - return false; + return nullptr; // If both candidates affect the same set in the same boundary, go with the // smallest increase. unsigned TryPSet = TryP.getPSetOrMax(); unsigned CandPSet = CandP.getPSetOrMax(); if (TryPSet == CandPSet) { - return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, - Reason); + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand); } int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : @@ -2983,12 +2925,14 @@ // If the candidates are decreasing pressure, reverse priority. if (TryP.getUnitInc() < 0) std::swap(TryRank, CandRank); - return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); + return tryLess(TryRank, CandRank, Cand, TryCand); } +namespace llvm { unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } +} // end namespace llvm /// Minimize physical register live ranges. Regalloc wants them adjacent to /// their physreg def/use. @@ -2997,7 +2941,7 @@ /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -int biasPhysReg(const SUnit *SU, bool isTop) { +int GenericSchedulerBase::biasPhysReg(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); if (MI->isCopy()) { @@ -3032,7 +2976,58 @@ return 0; } -} // end namespace llvm + +void GenericScheduler::registerHeuristics() { + registerHeuristic(PhysReg); + + if (DAG->isTrackingPressure()) { + registerHeuristic(RegExcess); + registerHeuristic(RegCritMax); + } + + registerHeuristic(AcyLimitedTopDepthReduce); + registerHeuristic(AcyLimitedTopPathReduce); + registerHeuristic(AcyLimitedBotHeightReduce); + registerHeuristic(AcyLimitedBotPathReduce); + registerHeuristic(Stall); + registerHeuristic(Cluster); + registerHeuristic(Weak); + + if (DAG->isTrackingPressure()) + registerHeuristic(RegMax); + + registerHeuristic(ResourceReduce); + registerHeuristic(ResourceDemand); + + if (!RegionPolicy.DisableLatencyHeuristic) { + registerHeuristic(TopDepthReduce); + registerHeuristic(TopPathReduce); + registerHeuristic(BotHeightReduce); + registerHeuristic(BotPathReduce); + } + + registerHeuristic(NodeOrder); +} + +#ifndef NDEBUG +void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; + if (Cand.Reason == &RegExcess) + P = Cand.RPDelta.Excess; + else if (Cand.Reason == &RegCritMax) + P = Cand.RPDelta.CriticalMax; + else if (Cand.Reason == &RegMax) + P = Cand.RPDelta.CurrentMax; + + if (P.isValid()) { + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << Cand.Reason->Name; + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) << ":" + << P.getUnitInc() << " "; + dbgs() << '\n'; + } else + GenericSchedulerBase::traceCandidate(Cand); +} +#endif void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, @@ -3071,119 +3066,6 @@ << Cand.RPDelta.Excess.getUnitInc() << "\n"); } -/// Apply a set of heuristics to a new candidate. Heuristics are currently -/// hierarchical. This may be more efficient than a graduated cost model because -/// we don't need to evaluate all aspects of the model for each node in the -/// queue. But it's really done to make the heuristics easier to debug and -/// statistically analyze. -/// -/// \param Cand provides the policy and current best candidate. -/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -/// \param Zone describes the scheduled zone that we are extending, or nullptr -// if Cand is from a different zone than TryCand. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone) const { - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; - return; - } - - // Bias PhysReg Defs and copies to their uses and defined respectively. - if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), - biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; - - // Avoid exceeding the target's limit. - if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, - Cand.RPDelta.Excess, - TryCand, Cand, RegExcess, TRI, - DAG->MF)) - return; - - // Avoid increasing the max critical pressure in the scheduled region. - if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, - Cand.RPDelta.CriticalMax, - TryCand, Cand, RegCritical, TRI, - DAG->MF)) - return; - - // We only compare a subset of features when comparing nodes between - // Top and Bottom boundary. Some properties are simply incomparable, in many - // other instances we should only override the other boundary if something - // is a clear good pick on one boundary. Skip heuristics that are more - // "tie-breaking" in nature. - bool SameBoundary = Zone != nullptr; - if (SameBoundary) { - // For loops that are acyclic path limited, aggressively schedule for - // latency. Within an single cycle, whenever CurrMOps > 0, allow normal - // heuristics to take precedence. - if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && - tryLatency(TryCand, Cand, *Zone)) - return; - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), - Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - } - - // Keep clustered nodes together to encourage downstream peephole - // optimizations which may reduce resource requirements. - // - // This is a best effort to set things up for a post-RA pass. Optimizations - // like generating loads of multiple registers should ideally be done within - // the scheduler pass by combining the loads during DAG postprocessing. - const SUnit *CandNextClusterSU = - Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - const SUnit *TryCandNextClusterSU = - TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - if (tryGreater(TryCand.SU == TryCandNextClusterSU, - Cand.SU == CandNextClusterSU, - TryCand, Cand, Cluster)) - return; - - if (SameBoundary) { - // Weak edges are for clustering and other constraints. - if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), - getWeakLeft(Cand.SU, Cand.AtTop), - TryCand, Cand, Weak)) - return; - } - - // Avoid increasing the max pressure of the entire region. - if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, - Cand.RPDelta.CurrentMax, - TryCand, Cand, RegMax, TRI, - DAG->MF)) - return; - - if (SameBoundary) { - // Avoid critical resource consumption and balance the schedule. - TryCand.initResourceDelta(DAG, SchedModel); - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, - TryCand, Cand, ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - // For acyclic path limited loops, latency was already checked above. - if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && - !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; - - // Fall through to original instruction order. - if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) - || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { - TryCand.Reason = NodeOrder; - } - } -} - /// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during @@ -3204,7 +3086,7 @@ // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidate(Cand, TryCand, ZoneArg); - if (TryCand.Reason != NoCand) { + if (TryCand.Reason) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); @@ -3220,12 +3102,12 @@ // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - tracePick(Only1, false); + tracePick(&Only1, false); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - tracePick(Only1, true); + tracePick(&Only1, true); return SU; } // Set the bottom-up policy based on the state of the current bottom zone and @@ -3243,7 +3125,7 @@ BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(BotCand)); #ifndef NDEBUG @@ -3263,7 +3145,7 @@ TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(TopCand)); #ifndef NDEBUG @@ -3281,9 +3163,9 @@ assert(BotCand.isValid()); assert(TopCand.isValid()); SchedCandidate Cand = BotCand; - TopCand.Reason = NoCand; + TopCand.Reason = nullptr; tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { + if (TopCand.Reason) { Cand.setBest(TopCand); LLVM_DEBUG(traceCandidate(Cand)); } @@ -3308,7 +3190,7 @@ CandPolicy NoPolicy; TopCand.reset(NoPolicy); pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find a candidate"); + assert(TopCand.Reason && "failed to find a candidate"); tracePick(TopCand); SU = TopCand.SU; } @@ -3319,7 +3201,7 @@ CandPolicy NoPolicy; BotCand.reset(NoPolicy); pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find a candidate"); + assert(BotCand.Reason && "failed to find a candidate"); tracePick(BotCand); SU = BotCand.SU; } @@ -3427,7 +3309,38 @@ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( Itin, DAG); } + GenericSchedulerBase::initialize(Dag); +} + +void PostGenericScheduler::registerHeuristics() { + registerHeuristic(Stall); + registerHeuristic(Cluster); + registerHeuristic(ResReducePostRA); + registerHeuristic(ResourceDemand); + registerHeuristic(TopDepthReduce); + registerHeuristic(TopPathReduce); + registerHeuristic(BotHeightReduce); + registerHeuristic(BotPathReduce); + registerHeuristic(NodeOrder); +} + +#ifndef NDEBUG +void PostGenericScheduler::traceCandidate(const SchedCandidate &Cand) { + unsigned ResIdx = 0; + if (Cand.Reason == &ResReducePostRA) { + ResIdx = Cand.Policy.ReduceResIdx; + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << Cand.Reason->Name; + dbgs() << " "; + if (ResIdx) + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + dbgs() << " "; + dbgs() << '\n'; + } else + GenericSchedulerBase::traceCandidate(Cand); } +#endif void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); @@ -3443,48 +3356,6 @@ } } -/// Apply a set of heuristics to a new candidate for PostRA scheduling. -/// -/// \param Cand provides the policy and current best candidate. -/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand) { - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; - return; - } - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Top.getLatencyStallCycles(TryCand.SU), - Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - - // Keep clustered nodes together. - if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), - Cand.SU == DAG->getNextClusterSucc(), - TryCand, Cand, Cluster)) - return; - - // Avoid critical resource consumption and balance the schedule. - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, - TryCand, Cand, ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; - } - - // Fall through to original instruction order. - if (TryCand.SU->NodeNum < Cand.SU->NodeNum) - TryCand.Reason = NodeOrder; -} - void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { ReadyQueue &Q = Top.Available; for (SUnit *SU : Q) { @@ -3492,8 +3363,8 @@ TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); - tryCandidate(Cand, TryCand); - if (TryCand.Reason != NoCand) { + tryCandidate(Cand, TryCand, &Top); + if (TryCand.Reason) { Cand.setBest(TryCand); LLVM_DEBUG(traceCandidate(Cand)); } @@ -3510,7 +3381,7 @@ do { SU = Top.pickOnlyChoice(); if (SU) { - tracePick(Only1, true); + tracePick(&Only1, true); } else { CandPolicy NoPolicy; SchedCandidate TopCand(NoPolicy); @@ -3518,7 +3389,7 @@ // the instructions outside the zone, including the bottom zone. setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); pickNodeFromQueue(TopCand); - assert(TopCand.Reason != NoCand && "failed to find a candidate"); + assert(TopCand.Reason && "failed to find a candidate"); tracePick(TopCand); SU = TopCand.SU; } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -158,7 +158,7 @@ // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); - if (TryCand.Reason != NoCand) { + if (TryCand.Reason) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(Zone.DAG, SchedModel); @@ -196,7 +196,7 @@ BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(BotCand)); #ifndef NDEBUG @@ -216,7 +216,7 @@ TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(TopCand)); #ifndef NDEBUG @@ -234,9 +234,9 @@ LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); dbgs() << "Bot Cand: "; traceCandidate(BotCand);); SchedCandidate Cand = BotCand; - TopCand.Reason = NoCand; + TopCand.Reason = nullptr; GenericScheduler::tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { + if (TopCand.Reason) { Cand.setBest(TopCand); } LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); @@ -261,7 +261,7 @@ CandPolicy NoPolicy; TopCand.reset(NoPolicy); pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find a candidate"); + assert(TopCand.Reason && "failed to find a candidate"); SU = TopCand.SU; } IsTopNode = true; @@ -271,7 +271,7 @@ CandPolicy NoPolicy; BotCand.reset(NoPolicy); pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find a candidate"); + assert(BotCand.Reason && "failed to find a candidate"); SU = BotCand.SU; } IsTopNode = false; diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h @@ -20,31 +20,44 @@ /// A MachineSchedStrategy implementation for PowerPC pre RA scheduling. class PPCPreRASchedStrategy : public GenericScheduler { public: - PPCPreRASchedStrategy(const MachineSchedContext *C) : - GenericScheduler(C) {} + PPCPreRASchedStrategy(const MachineSchedContext *C) : GenericScheduler(C) {} + protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, - SchedBoundary *Zone) const override; -private: - bool biasAddiLoadCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone) const; + // There are some benefits to schedule the ADDI before the load to hide the + // latency, as RA may create a true dependency between the load and addi. + struct BiasAddiLoadHeuristic : public SchedHeuristic { + BiasAddiLoadHeuristic() : SchedHeuristic("ADDI-LOAD") {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override; + }; + BiasAddiLoadHeuristic BiasAddiLoad; + + void registerHeuristics() override; }; /// A MachineSchedStrategy implementation for PowerPC post RA scheduling. class PPCPostRASchedStrategy : public PostGenericScheduler { public: - PPCPostRASchedStrategy(const MachineSchedContext *C) : - PostGenericScheduler(C) {} + PPCPostRASchedStrategy(const MachineSchedContext *C) + : PostGenericScheduler(C) {} protected: + // There are some benefits to schedule the ADDI as early as possible post ra + // to avoid stalled by vector instructions which take up all the hw units. + // And ADDI is usually used to post inc the loop indvar, which matters the + // performance. + struct BiasAddiHeuristic : public SchedHeuristic { + BiasAddiHeuristic() : SchedHeuristic("BIAS-ADDI") {} + SchedCandidate *Try(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) override; + }; + BiasAddiHeuristic BiasAddi; + void initialize(ScheduleDAGMI *Dag) override; SUnit *pickNode(bool &IsTopNode) override; void enterMBB(MachineBasicBlock *MBB) override; void leaveMBB() override; - - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; - bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const; + void registerHeuristics() override; }; } // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -26,99 +26,42 @@ Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; } -static void pickTryCand(GenericScheduler::SchedCandidate &Cand, - GenericScheduler::SchedCandidate &TryCand) { - // Update the reason if we have already selected the TryCand. - if (TryCand.Reason != GenericScheduler::NoCand) { - if (TryCand.Reason > GenericScheduler::Stall) - TryCand.Reason = GenericScheduler::Stall; - } else { - // TryCand is not selected by heuristic and Cand is selected. If it is - // NodeOrder, we can still select the TryCand. - if (Cand.Reason == GenericScheduler::NoCand || - Cand.Reason == GenericScheduler::NodeOrder) - TryCand.Reason = GenericScheduler::Stall; - } -} - -static void pickCand(GenericScheduler::SchedCandidate &Cand, - GenericScheduler::SchedCandidate &TryCand) { - if (TryCand.Reason == GenericScheduler::NoCand) { - // TryCand is not selected by heuristic. Update the reason for Cand. - if (Cand.Reason > GenericScheduler::Stall) - Cand.Reason = GenericScheduler::Stall; - } else if (TryCand.Reason == GenericScheduler::NodeOrder) { - // TryCand is selected but it is because of the NodeOrder heuristic. We can - // still select it as Cand. - TryCand.Reason = GenericScheduler::NoCand; - Cand.Reason = GenericScheduler::Stall; - } -} - -bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, +GenericScheduler::SchedCandidate * +PPCPreRASchedStrategy::BiasAddiLoadHeuristic::Try(SchedCandidate &Cand, SchedCandidate &TryCand, - SchedBoundary &Zone) const { - if (DisableAddiLoadHeuristic) - return false; - - SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; - SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; - if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) { - pickTryCand(Cand, TryCand); - return true; - } - if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) { - pickCand(Cand, TryCand); - return true; - } - - return false; + SchedBoundary *Zone) { + if (!Zone) + return nullptr; + SchedCandidate &FirstCand = Zone->isTop() ? TryCand : Cand; + SchedCandidate &SecondCand = Zone->isTop() ? Cand : TryCand; + if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) + return &TryCand; + if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) + return &Cand; + return nullptr; } -void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone) const { - GenericScheduler::tryCandidate(Cand, TryCand, Zone); - - if (!Cand.isValid() || !Zone) - return; - - // There are some benefits to schedule the ADDI before the load to hide the - // latency, as RA may create a true dependency between the load and addi. - if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) - return; +void PPCPreRASchedStrategy::registerHeuristics() { + GenericScheduler::registerHeuristics(); + if (!DisableAddiLoadHeuristic) + registerHeuristic(BiasAddiLoad, &NodeOrder); } -bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand) const { - if (!EnableAddiHeuristic) - return false; - - if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) { - pickTryCand(Cand, TryCand); - return true; - } - - if (isADDIInstr(Cand) && !isADDIInstr(TryCand)) { - pickCand(Cand, TryCand); - return true; - } - return false; +GenericScheduler::SchedCandidate * +PPCPostRASchedStrategy::BiasAddiHeuristic::Try(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { + if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) + return &TryCand; + if (isADDIInstr(Cand) && !isADDIInstr(TryCand)) + return &Cand; + return nullptr; } -void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand) { - PostGenericScheduler::tryCandidate(Cand, TryCand); - - if (!Cand.isValid()) - return; - - // There are some benefits to schedule the ADDI as early as possible post ra - // to avoid stalled by vector instructions which take up all the hw units. - // And ADDI is usually used to post inc the loop indvar, which matters the - // performance. - if (biasAddiCandidate(Cand, TryCand)) - return; +void PPCPostRASchedStrategy::registerHeuristics() { + PostGenericScheduler::registerHeuristics(); + if (EnableAddiHeuristic) + registerHeuristic(BiasAddi, &NodeOrder); } void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {