diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -796,17 +796,6 @@ /// heuristics for either preRA or postRA scheduling. class GenericSchedulerBase : public MachineSchedStrategy { public: - /// Represent the type of SchedCandidate found within a single queue. - /// pickNodeBidirectional depends on these listed by decreasing priority. - enum CandReason : uint8_t { - NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, - RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; - -#ifndef NDEBUG - static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); -#endif - /// Policy for scheduling the next instruction in the candidate's zone. struct CandPolicy { bool ReduceLatency = false; @@ -844,6 +833,25 @@ } }; + struct SchedCandidate; + /// Represent the heuristic we used for either PreRA or PostRA scheduling. + struct SchedHeuristic { + // Heuristic name + std::string Name; + // Return either the Cand or TryCand if either wins, otherwise return + // nullptr if none wins. + std::function + Try; + }; +#define INIT_HEURISTIC(NAME, FUNC) INIT_HEURISTIC_FULL(NAME, &, FUNC) +#define INIT_HEURISTIC_FULL(NAME, CAPTURE, FUNC) \ + { \ + NAME, \ + [CAPTURE](SchedCandidate & Cand, SchedCandidate & TryCand, \ + SchedBoundary * Zone) -> SchedCandidate *FUNC \ + } + /// Store the state used by GenericScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { @@ -852,8 +860,8 @@ // The best SUnit candidate. SUnit *SU; - // The reason for this candidate. - CandReason Reason; + // The heuristic reason for this candidate. + const SchedHeuristic *Reason = nullptr; // Whether this candidate should be scheduled at top/bottom. bool AtTop; @@ -870,7 +878,7 @@ void reset(const CandPolicy &NewPolicy) { Policy = NewPolicy; SU = nullptr; - Reason = NoCand; + Reason = nullptr; AtTop = false; RPDelta = RegPressureDelta(); ResDelta = SchedResourceDelta(); @@ -880,7 +888,7 @@ // Copy the status of another candidate without changing policy. void setBest(SchedCandidate &Best) { - assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + assert(Best.Reason && "uninitialized Sched candidate"); SU = Best.SU; Reason = Best.Reason; AtTop = Best.AtTop; @@ -892,6 +900,13 @@ const TargetSchedModel *SchedModel); }; +private: + // Keep all the heuristics. Notice that, the earlier it is pushed into the + // vector, the higher priority it has. + SmallVector Heuristics; + + ScheduleDAGMI *DAG = nullptr; + protected: const MachineSchedContext *Context; const TargetSchedModel *SchedModel = nullptr; @@ -899,39 +914,114 @@ SchedRemainder Rem; - GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} + // Predefined heuristics + // Bias PhysReg Defs and copies to their uses and defined respectively. + SchedHeuristic PhysReg; + // Fall through to original instruction order. + SchedHeuristic NodeOrder; + // Fake heuristic for debug only + SchedHeuristic Only1; + // Prioritize instructions that read unbuffered resources by stall cycles. + SchedHeuristic Stall; + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // This is a best effort to set things up for a post-RA pass. + // Optimizations like generating loads of multiple registers should ideally + // be done within the scheduler pass by combining the loads during DAG + // postprocessing. + SchedHeuristic Cluster; + // For loops that are acyclic path limited, aggressively schedule for latency. + // Within an single cycle, whenever CurrMOps > 0, allow normal heuristics to + // take precedence. + SchedHeuristic AcyLimitedTopDepthReduce; + SchedHeuristic AcyLimitedTopPathReduce; + SchedHeuristic AcyLimitedBotHeightReduce; + SchedHeuristic AcyLimitedBotPathReduce; + // Avoid critical resource consumption and balance the schedule. + SchedHeuristic ResourceReduce; + SchedHeuristic ResourceDemand; + // Prefer the candidate with the lesser depth, but only if one of them has + // depth greater than the total latency scheduled so far, otherwise either of + // them could be scheduled now with no stall. + SchedHeuristic TopDepthReduce; + SchedHeuristic TopPathReduce; + // Prefer the candidate with the lesser height, but only if one of them + // has height greater than the total latency scheduled so far, otherwise + // either of them could be scheduled now with no stall. + SchedHeuristic BotHeightReduce; + SchedHeuristic BotPathReduce; + // Weak edges are for clustering and other constraints. + SchedHeuristic Weak; + + GenericSchedulerBase(const MachineSchedContext *C) : Context(C) { + initializeHeuristics(); + } void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone); + void initialize(ScheduleDAGMI *Dag) override { + DAG = Dag; + Heuristics.clear(); + registerHeuristics(); + } + + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const; + + bool hasHigherPriority(const SchedHeuristic *Reason1, + const SchedHeuristic *Reason2) const { + if (!Reason1) + return false; + if (!Reason2) + return true; + assert(std::count(Heuristics.begin(), Heuristics.end(), Reason1) && + std::count(Heuristics.begin(), Heuristics.end(), Reason2) && + "Unable to compare the priority for these two heruistics"); + return std::find(Heuristics.begin(), Heuristics.end(), Reason1) < + std::find(Heuristics.begin(), Heuristics.end(), Reason2); + } + + void registerHeuristic(SchedHeuristic &Heuristic, + SchedHeuristic *Before = nullptr) { + assert(!std::count(Heuristics.begin(), Heuristics.end(), &Heuristic) && + "Heuristic is added multiple times"); + if (!Before) + Heuristics.push_back(&Heuristic); + else { + auto Pos = std::find(Heuristics.begin(), Heuristics.end(), Before); + assert(Pos != Heuristics.end() && + "Don't know how to register the heuristics"); + Heuristics.insert(Pos, &Heuristic); + } + } + + // Register all the heuristics. + virtual void registerHeuristics(){}; + #ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); + virtual void traceCandidate(const SchedCandidate &Cand); #endif private: bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, bool ComputeRemLatency, unsigned &RemLatency) const; + void initializeHeuristics(); }; // Utility functions used by heuristics in tryCandidate(). -bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason); -bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason); -bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone); -bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF); +GenericSchedulerBase::SchedCandidate * +tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand); +GenericSchedulerBase::SchedCandidate * +tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand); +GenericSchedulerBase::SchedCandidate * +tryPressure(const PressureChange &TryP, const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + const TargetRegisterInfo *TRI, const MachineFunction &MF); unsigned getWeakLeft(const SUnit *SU, bool isTop); int biasPhysReg(const SUnit *SU, bool isTop); @@ -939,9 +1029,11 @@ /// the schedule. class GenericScheduler : public GenericSchedulerBase { public: - GenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), - Bot(SchedBoundary::BotQID, "BotQ") {} + GenericScheduler(const MachineSchedContext *C) + : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), + Bot(SchedBoundary::BotQID, "BotQ") { + initializeHeuristics(); + } void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, @@ -981,6 +1073,9 @@ void registerRoots() override; +private: + void initializeHeuristics(); + protected: ScheduleDAGMILive *DAG = nullptr; @@ -995,15 +1090,23 @@ /// Candidate last picked from Bot boundary. SchedCandidate BotCand; + // Avoid exceeding the target's limit. + SchedHeuristic RegExcess; + // Avoid increasing the max critical pressure in the scheduled region. + SchedHeuristic RegCritMax; + // Avoid increasing the max pressure of the entire region. + SchedHeuristic RegMax; + + void registerHeuristics() override; void checkAcyclicLatency(); void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, - SchedBoundary *Zone) const; - +#ifndef NDEBUG + void traceCandidate(const SchedCandidate &Cand) override; +#endif SUnit *pickNodeBidirectional(bool &IsTopNode); void pickNodeFromQueue(SchedBoundary &Zone, @@ -1025,9 +1128,13 @@ SchedBoundary Top; SmallVector BotRoots; + SchedHeuristic ResReducePostRA; + public: - PostGenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} + PostGenericScheduler(const MachineSchedContext *C) + : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") { + initializeHeuristics(); + } ~PostGenericScheduler() override = default; @@ -1063,10 +1170,12 @@ BotRoots.push_back(SU); } -protected: - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); +private: + void initializeHeuristics(); +protected: void pickNodeFromQueue(SchedCandidate &Cand); + void registerHeuristics() override; }; /// Create the standard converging machine scheduler. This will be used as the diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2540,6 +2540,170 @@ } } +void GenericSchedulerBase::initializeHeuristics() { + PhysReg = INIT_HEURISTIC("PHYS-REG", { + return tryLess(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), Cand, TryCand); + }); + + NodeOrder = INIT_HEURISTIC("ORDER", { + if (Zone && ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || + (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum))) + return &TryCand; + return nullptr; + }); + + Only1 = INIT_HEURISTIC("ONLY1", { return nullptr; }); + + Weak = INIT_HEURISTIC("WEAK", { + if (!Zone) + return nullptr; + return tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), + getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand); + }); + + Stall = INIT_HEURISTIC("STALL", { + if (!Zone) + return nullptr; + return tryLess(Zone->getLatencyStallCycles(TryCand.SU), + Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand); + }); + + Cluster = INIT_HEURISTIC("CLUSTER", { + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + return tryLess(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, Cand, TryCand); + }); + + AcyLimitedTopDepthReduce = INIT_HEURISTIC_FULL("TOP-DEPTH", this, { + if (!Zone || !Zone->isTop()) + return nullptr; + if (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps()) + return nullptr; + if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) <= + Zone->getScheduledLatency()) + return nullptr; + return tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), TryCand, Cand); + }); + + AcyLimitedTopPathReduce = INIT_HEURISTIC_FULL("TOP-PATH", this, { + if (!Zone || !Zone->isTop()) + return nullptr; + if (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps()) + return nullptr; + return tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), Cand, + TryCand); + }); + + AcyLimitedBotHeightReduce = INIT_HEURISTIC_FULL("BOT-HEIGHT", this, { + if (!Zone || Zone->isTop()) + return nullptr; + if (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps()) + return nullptr; + if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) <= + Zone->getScheduledLatency()) + return nullptr; + return tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, + Cand); + }); + + AcyLimitedBotPathReduce = INIT_HEURISTIC_FULL("BOT-PATH", this, { + if (!Zone || Zone->isTop()) + return nullptr; + if (!Rem.IsAcyclicLatencyLimited || Zone->getCurrMOps()) + return nullptr; + return tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), Cand, TryCand); + }); + + ResourceReduce = INIT_HEURISTIC_FULL("RES-REDUCE", this, { + if (!Zone) + return nullptr; + TryCand.initResourceDelta(DAG, SchedModel); + return tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand); + }); + + ResourceDemand = INIT_HEURISTIC("RES-DEMAND", { + if (!Zone) + return nullptr; + return tryLess(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, Cand, TryCand); + }); + + TopDepthReduce = INIT_HEURISTIC_FULL("TOP-DEPTH", this, { + if (!Zone || !Zone->isTop()) + return nullptr; + if (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited) + return nullptr; + if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) <= + Zone->getScheduledLatency()) + return nullptr; + return tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), TryCand, Cand); + }); + + TopPathReduce = INIT_HEURISTIC_FULL("TOP-PATH", this, { + if (!Zone || !Zone->isTop()) + return nullptr; + if (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited) + return nullptr; + return tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), Cand, + TryCand); + }); + + BotHeightReduce = INIT_HEURISTIC_FULL("BOT-HEIGHT", this, { + if (!Zone || Zone->isTop()) + return nullptr; + if (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited) + return nullptr; + if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) <= + Zone->getScheduledLatency()) + return nullptr; + return tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, + Cand); + }); + + BotPathReduce = INIT_HEURISTIC_FULL("BOT-PATH", this, { + if (!Zone || Zone->isTop()) + return nullptr; + if (!TryCand.Policy.ReduceLatency || Rem.IsAcyclicLatencyLimited) + return nullptr; + return tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), Cand, TryCand); + }); +} + +/// Apply a set of heuristics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending, or nullptr +// if Cand is from a different zone than TryCand. +void GenericSchedulerBase::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = &NodeOrder; + return; + } + + for (auto Reason : Heuristics) { + if (SchedCandidate *SC = Reason->Try(Cand, TryCand, Zone)) { + assert((SC == &Cand || SC == &TryCand) && + "Either Cand or TryCand is expected"); + if (SC == &TryCand || hasHigherPriority(Reason, SC->Reason)) + SC->Reason = Reason; + break; + } + } +} + /// Compute remaining latency. We need this both to determine whether the /// overall schedule has become latency-limited and whether the instructions /// outside this zone are resource or latency limited. @@ -2643,71 +2807,28 @@ } #ifndef NDEBUG -const char *GenericSchedulerBase::getReasonStr( - GenericSchedulerBase::CandReason Reason) { - switch (Reason) { - case NoCand: return "NOCAND "; - case Only1: return "ONLY1 "; - case PhysReg: return "PHYS-REG "; - case RegExcess: return "REG-EXCESS"; - case RegCritical: return "REG-CRIT "; - case Stall: return "STALL "; - case Cluster: return "CLUSTER "; - case Weak: return "WEAK "; - case RegMax: return "REG-MAX "; - case ResourceReduce: return "RES-REDUCE"; - case ResourceDemand: return "RES-DEMAND"; - case TopDepthReduce: return "TOP-DEPTH "; - case TopPathReduce: return "TOP-PATH "; - case BotHeightReduce:return "BOT-HEIGHT"; - case BotPathReduce: return "BOT-PATH "; - case NextDefUse: return "DEF-USE "; - case NodeOrder: return "ORDER "; - }; - llvm_unreachable("Unknown reason!"); -} - void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { - PressureChange P; unsigned ResIdx = 0; unsigned Latency = 0; - switch (Cand.Reason) { - default: - break; - case RegExcess: - P = Cand.RPDelta.Excess; - break; - case RegCritical: - P = Cand.RPDelta.CriticalMax; - break; - case RegMax: - P = Cand.RPDelta.CurrentMax; - break; - case ResourceReduce: - ResIdx = Cand.Policy.ReduceResIdx; - break; - case ResourceDemand: - ResIdx = Cand.Policy.DemandResIdx; - break; - case TopDepthReduce: - Latency = Cand.SU->getDepth(); - break; - case TopPathReduce: - Latency = Cand.SU->getHeight(); - break; - case BotHeightReduce: - Latency = Cand.SU->getHeight(); - break; - case BotPathReduce: - Latency = Cand.SU->getDepth(); - break; - } - dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); - if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) - << ":" << P.getUnitInc() << " "; - else - dbgs() << " "; + if (Cand.Reason) { + if (Cand.Reason->Name == "RES-REDUCE") + ResIdx = Cand.Policy.ReduceResIdx; + else if (Cand.Reason->Name == "RES-DEMAND") + ResIdx = Cand.Policy.DemandResIdx; + else if (Cand.Reason->Name == "TOP-DEPTH") + Latency = Cand.SU->getDepth(); + else if (Cand.Reason->Name == "TOP-PATH") + Latency = Cand.SU->getHeight(); + else if (Cand.Reason->Name == "BOT-HEIGHT") + Latency = Cand.SU->getHeight(); + else if (Cand.Reason->Name == "BOT-PATH") + Latency = Cand.SU->getDepth(); + } + + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " + << (Cand.Reason ? Cand.Reason->Name : "NOCAND "); + + dbgs() << " "; if (ResIdx) dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; else @@ -2721,76 +2842,22 @@ #endif namespace llvm { -/// Return true if this heuristic determines order. -bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { - if (TryVal < CandVal) { - TryCand.Reason = Reason; - return true; - } - if (TryVal > CandVal) { - if (Cand.Reason > Reason) - Cand.Reason = Reason; - return true; - } - return false; -} - -bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { - if (TryVal > CandVal) { - TryCand.Reason = Reason; - return true; - } - if (TryVal < CandVal) { - if (Cand.Reason > Reason) - Cand.Reason = Reason; - return true; - } - return false; -} - -bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { - if (Zone.isTop()) { - // Prefer the candidate with the lesser depth, but only if one of them has - // depth greater than the total latency scheduled so far, otherwise either - // of them could be scheduled now with no stall. - if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) > - Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) - return true; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericSchedulerBase::TopPathReduce)) - return true; - } else { - // Prefer the candidate with the lesser height, but only if one of them has - // height greater than the total latency scheduled so far, otherwise either - // of them could be scheduled now with no stall. - if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) > - Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) - return true; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericSchedulerBase::BotPathReduce)) - return true; - } - return false; +/// Return the candidate if this heuristic determines order. +GenericSchedulerBase::SchedCandidate * +tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand) { + if (TryVal < CandVal) + return &TryCand; + if (TryVal > CandVal) + return &Cand; + return nullptr; } } // end namespace llvm -static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { +static void tracePick(const GenericSchedulerBase::SchedHeuristic *Reason, + bool IsTop) { LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << GenericSchedulerBase::getReasonStr(Reason) << '\n'); + << (Reason ? Reason->Name : "NOCAND ") << '\n'); } static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { @@ -2828,6 +2895,8 @@ } TopCand.SU = nullptr; BotCand.SU = nullptr; + + GenericSchedulerBase::initialize(dag); } /// Initialize the per-region scheduling policy. @@ -2947,31 +3016,28 @@ } namespace llvm { -bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) { +GenericSchedulerBase::SchedCandidate * +tryPressure(const PressureChange &TryP, const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + const TargetRegisterInfo *TRI, const MachineFunction &MF) { // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. - if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, - Reason)) { - return true; - } + if (auto Result = + tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, Cand, TryCand)) + return Result; + // Do not compare the magnitude of pressure changes between top and bottom // boundary. if (Cand.AtTop != TryCand.AtTop) - return false; + return nullptr; // If both candidates affect the same set in the same boundary, go with the // smallest increase. unsigned TryPSet = TryP.getPSetOrMax(); unsigned CandPSet = CandP.getPSetOrMax(); if (TryPSet == CandPSet) { - return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, - Reason); + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand); } int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : @@ -2983,7 +3049,7 @@ // If the candidates are decreasing pressure, reverse priority. if (TryP.getUnitInc() < 0) std::swap(TryRank, CandRank); - return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); + return tryLess(TryRank, CandRank, Cand, TryCand); } unsigned getWeakLeft(const SUnit *SU, bool isTop) { @@ -3034,6 +3100,78 @@ } } // end namespace llvm +void GenericScheduler::initializeHeuristics() { + RegExcess = INIT_HEURISTIC_FULL("RES-EXCESS", this, { + return tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, + Cand, TRI, DAG->MF); + }); + + RegCritMax = INIT_HEURISTIC_FULL("REG-CRIT", this, { + return tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, TRI, DAG->MF); + }); + + RegMax = INIT_HEURISTIC_FULL("REG-MAX", this, { + return tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, + TryCand, Cand, TRI, DAG->MF); + }); +} + +void GenericScheduler::registerHeuristics() { + registerHeuristic(PhysReg); + + if (DAG->isTrackingPressure()) { + registerHeuristic(RegExcess); + registerHeuristic(RegCritMax); + } + + registerHeuristic(AcyLimitedTopDepthReduce); + registerHeuristic(AcyLimitedTopPathReduce); + registerHeuristic(AcyLimitedBotHeightReduce); + registerHeuristic(AcyLimitedBotPathReduce); + registerHeuristic(Stall); + registerHeuristic(Cluster); + registerHeuristic(Weak); + + if (DAG->isTrackingPressure()) + registerHeuristic(RegMax); + + registerHeuristic(ResourceReduce); + registerHeuristic(ResourceDemand); + + if (!RegionPolicy.DisableLatencyHeuristic) { + registerHeuristic(TopDepthReduce); + registerHeuristic(TopPathReduce); + registerHeuristic(BotHeightReduce); + registerHeuristic(BotPathReduce); + } + + registerHeuristic(NodeOrder); +} + +#ifndef NDEBUG +void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; + if (Cand.Reason) { + if (Cand.Reason->Name == "REG-EXCESS") + P = Cand.RPDelta.Excess; + else if (Cand.Reason->Name == "REG-CRIT") + P = Cand.RPDelta.CriticalMax; + else if (Cand.Reason->Name == "REG-MAX") + P = Cand.RPDelta.CurrentMax; + } + + if (P.isValid()) { + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " + << (Cand.Reason ? Cand.Reason->Name : "NOCAND"); + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) << ":" + << P.getUnitInc() << " "; + dbgs() << '\n'; + } else + GenericSchedulerBase::traceCandidate(Cand); +} +#endif + void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, @@ -3071,119 +3209,6 @@ << Cand.RPDelta.Excess.getUnitInc() << "\n"); } -/// Apply a set of heuristics to a new candidate. Heuristics are currently -/// hierarchical. This may be more efficient than a graduated cost model because -/// we don't need to evaluate all aspects of the model for each node in the -/// queue. But it's really done to make the heuristics easier to debug and -/// statistically analyze. -/// -/// \param Cand provides the policy and current best candidate. -/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -/// \param Zone describes the scheduled zone that we are extending, or nullptr -// if Cand is from a different zone than TryCand. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone) const { - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; - return; - } - - // Bias PhysReg Defs and copies to their uses and defined respectively. - if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), - biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; - - // Avoid exceeding the target's limit. - if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, - Cand.RPDelta.Excess, - TryCand, Cand, RegExcess, TRI, - DAG->MF)) - return; - - // Avoid increasing the max critical pressure in the scheduled region. - if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, - Cand.RPDelta.CriticalMax, - TryCand, Cand, RegCritical, TRI, - DAG->MF)) - return; - - // We only compare a subset of features when comparing nodes between - // Top and Bottom boundary. Some properties are simply incomparable, in many - // other instances we should only override the other boundary if something - // is a clear good pick on one boundary. Skip heuristics that are more - // "tie-breaking" in nature. - bool SameBoundary = Zone != nullptr; - if (SameBoundary) { - // For loops that are acyclic path limited, aggressively schedule for - // latency. Within an single cycle, whenever CurrMOps > 0, allow normal - // heuristics to take precedence. - if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && - tryLatency(TryCand, Cand, *Zone)) - return; - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), - Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - } - - // Keep clustered nodes together to encourage downstream peephole - // optimizations which may reduce resource requirements. - // - // This is a best effort to set things up for a post-RA pass. Optimizations - // like generating loads of multiple registers should ideally be done within - // the scheduler pass by combining the loads during DAG postprocessing. - const SUnit *CandNextClusterSU = - Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - const SUnit *TryCandNextClusterSU = - TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - if (tryGreater(TryCand.SU == TryCandNextClusterSU, - Cand.SU == CandNextClusterSU, - TryCand, Cand, Cluster)) - return; - - if (SameBoundary) { - // Weak edges are for clustering and other constraints. - if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), - getWeakLeft(Cand.SU, Cand.AtTop), - TryCand, Cand, Weak)) - return; - } - - // Avoid increasing the max pressure of the entire region. - if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, - Cand.RPDelta.CurrentMax, - TryCand, Cand, RegMax, TRI, - DAG->MF)) - return; - - if (SameBoundary) { - // Avoid critical resource consumption and balance the schedule. - TryCand.initResourceDelta(DAG, SchedModel); - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, - TryCand, Cand, ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - // For acyclic path limited loops, latency was already checked above. - if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && - !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; - - // Fall through to original instruction order. - if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) - || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { - TryCand.Reason = NodeOrder; - } - } -} - /// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during @@ -3204,7 +3229,7 @@ // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidate(Cand, TryCand, ZoneArg); - if (TryCand.Reason != NoCand) { + if (TryCand.Reason) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); @@ -3220,12 +3245,12 @@ // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - tracePick(Only1, false); + tracePick(&Only1, false); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - tracePick(Only1, true); + tracePick(&Only1, true); return SU; } // Set the bottom-up policy based on the state of the current bottom zone and @@ -3243,7 +3268,7 @@ BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(BotCand)); #ifndef NDEBUG @@ -3263,7 +3288,7 @@ TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(TopCand)); #ifndef NDEBUG @@ -3281,9 +3306,9 @@ assert(BotCand.isValid()); assert(TopCand.isValid()); SchedCandidate Cand = BotCand; - TopCand.Reason = NoCand; + TopCand.Reason = nullptr; tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { + if (TopCand.Reason) { Cand.setBest(TopCand); LLVM_DEBUG(traceCandidate(Cand)); } @@ -3308,7 +3333,7 @@ CandPolicy NoPolicy; TopCand.reset(NoPolicy); pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find a candidate"); + assert(TopCand.Reason && "failed to find a candidate"); tracePick(TopCand); SU = TopCand.SU; } @@ -3319,7 +3344,7 @@ CandPolicy NoPolicy; BotCand.reset(NoPolicy); pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find a candidate"); + assert(BotCand.Reason && "failed to find a candidate"); tracePick(BotCand); SU = BotCand.SU; } @@ -3427,6 +3452,28 @@ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( Itin, DAG); } + GenericSchedulerBase::initialize(Dag); +} + +void PostGenericScheduler::initializeHeuristics() { + ResReducePostRA = INIT_HEURISTIC("RES-REDUCE", { + if (!Zone) + return nullptr; + return tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand); + }); +} + +void PostGenericScheduler::registerHeuristics() { + registerHeuristic(Stall); + registerHeuristic(Cluster); + registerHeuristic(ResReducePostRA); + registerHeuristic(ResourceDemand); + registerHeuristic(TopDepthReduce); + registerHeuristic(TopPathReduce); + registerHeuristic(BotHeightReduce); + registerHeuristic(BotPathReduce); + registerHeuristic(NodeOrder); } void PostGenericScheduler::registerRoots() { @@ -3443,48 +3490,6 @@ } } -/// Apply a set of heuristics to a new candidate for PostRA scheduling. -/// -/// \param Cand provides the policy and current best candidate. -/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand) { - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; - return; - } - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Top.getLatencyStallCycles(TryCand.SU), - Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - - // Keep clustered nodes together. - if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), - Cand.SU == DAG->getNextClusterSucc(), - TryCand, Cand, Cluster)) - return; - - // Avoid critical resource consumption and balance the schedule. - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, - TryCand, Cand, ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; - } - - // Fall through to original instruction order. - if (TryCand.SU->NodeNum < Cand.SU->NodeNum) - TryCand.Reason = NodeOrder; -} - void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { ReadyQueue &Q = Top.Available; for (SUnit *SU : Q) { @@ -3492,8 +3497,8 @@ TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); - tryCandidate(Cand, TryCand); - if (TryCand.Reason != NoCand) { + tryCandidate(Cand, TryCand, &Top); + if (TryCand.Reason) { Cand.setBest(TryCand); LLVM_DEBUG(traceCandidate(Cand)); } @@ -3510,7 +3515,7 @@ do { SU = Top.pickOnlyChoice(); if (SU) { - tracePick(Only1, true); + tracePick(&Only1, true); } else { CandPolicy NoPolicy; SchedCandidate TopCand(NoPolicy); @@ -3518,7 +3523,7 @@ // the instructions outside the zone, including the bottom zone. setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); pickNodeFromQueue(TopCand); - assert(TopCand.Reason != NoCand && "failed to find a candidate"); + assert(TopCand.Reason && "failed to find a candidate"); tracePick(TopCand); SU = TopCand.SU; } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -158,7 +158,7 @@ // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); - if (TryCand.Reason != NoCand) { + if (TryCand.Reason) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(Zone.DAG, SchedModel); @@ -196,7 +196,7 @@ BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(BotCand)); #ifndef NDEBUG @@ -216,7 +216,7 @@ TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(TopCand)); #ifndef NDEBUG @@ -234,9 +234,9 @@ LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); dbgs() << "Bot Cand: "; traceCandidate(BotCand);); SchedCandidate Cand = BotCand; - TopCand.Reason = NoCand; + TopCand.Reason = nullptr; GenericScheduler::tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { + if (TopCand.Reason) { Cand.setBest(TopCand); } LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); @@ -261,7 +261,7 @@ CandPolicy NoPolicy; TopCand.reset(NoPolicy); pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find a candidate"); + assert(TopCand.Reason && "failed to find a candidate"); SU = TopCand.SU; } IsTopNode = true; @@ -271,7 +271,7 @@ CandPolicy NoPolicy; BotCand.reset(NoPolicy); pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find a candidate"); + assert(BotCand.Reason && "failed to find a candidate"); SU = BotCand.SU; } IsTopNode = false; diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h @@ -20,31 +20,44 @@ /// A MachineSchedStrategy implementation for PowerPC pre RA scheduling. class PPCPreRASchedStrategy : public GenericScheduler { public: - PPCPreRASchedStrategy(const MachineSchedContext *C) : - GenericScheduler(C) {} + PPCPreRASchedStrategy(const MachineSchedContext *C) : GenericScheduler(C) { + initializeHeuristics(); + } + protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, - SchedBoundary *Zone) const override; + // There are some benefits to schedule the ADDI before the load to hide the + // latency, as RA may create a true dependency between the load and addi. + SchedHeuristic BiasAddiLoad; + + void registerHeuristics() override; + private: - bool biasAddiLoadCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone) const; + void initializeHeuristics(); }; /// A MachineSchedStrategy implementation for PowerPC post RA scheduling. class PPCPostRASchedStrategy : public PostGenericScheduler { public: - PPCPostRASchedStrategy(const MachineSchedContext *C) : - PostGenericScheduler(C) {} + PPCPostRASchedStrategy(const MachineSchedContext *C) + : PostGenericScheduler(C) { + initializeHeuristics(); + } protected: + // There are some benefits to schedule the ADDI as early as possible post ra + // to avoid stalled by vector instructions which take up all the hw units. + // And ADDI is usually used to post inc the loop indvar, which matters the + // performance. + SchedHeuristic BiasAddi; + void initialize(ScheduleDAGMI *Dag) override; SUnit *pickNode(bool &IsTopNode) override; void enterMBB(MachineBasicBlock *MBB) override; void leaveMBB() override; + void registerHeuristics() override; - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; - bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const; +private: + void initializeHeuristics(); }; } // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -26,99 +26,40 @@ Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; } -static void pickTryCand(GenericScheduler::SchedCandidate &Cand, - GenericScheduler::SchedCandidate &TryCand) { - // Update the reason if we have already selected the TryCand. - if (TryCand.Reason != GenericScheduler::NoCand) { - if (TryCand.Reason > GenericScheduler::Stall) - TryCand.Reason = GenericScheduler::Stall; - } else { - // TryCand is not selected by heuristic and Cand is selected. If it is - // NodeOrder, we can still select the TryCand. - if (Cand.Reason == GenericScheduler::NoCand || - Cand.Reason == GenericScheduler::NodeOrder) - TryCand.Reason = GenericScheduler::Stall; - } +void PPCPreRASchedStrategy::initializeHeuristics() { + BiasAddiLoad = INIT_HEURISTIC("ADDI-LOAD", { + if (!Zone) + return nullptr; + SchedCandidate &FirstCand = Zone->isTop() ? TryCand : Cand; + SchedCandidate &SecondCand = Zone->isTop() ? Cand : TryCand; + if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) + return &TryCand; + if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) + return &Cand; + return nullptr; + }); } -static void pickCand(GenericScheduler::SchedCandidate &Cand, - GenericScheduler::SchedCandidate &TryCand) { - if (TryCand.Reason == GenericScheduler::NoCand) { - // TryCand is not selected by heuristic. Update the reason for Cand. - if (Cand.Reason > GenericScheduler::Stall) - Cand.Reason = GenericScheduler::Stall; - } else if (TryCand.Reason == GenericScheduler::NodeOrder) { - // TryCand is selected but it is because of the NodeOrder heuristic. We can - // still select it as Cand. - TryCand.Reason = GenericScheduler::NoCand; - Cand.Reason = GenericScheduler::Stall; - } +void PPCPreRASchedStrategy::registerHeuristics() { + GenericScheduler::registerHeuristics(); + if (!DisableAddiLoadHeuristic) + registerHeuristic(BiasAddiLoad, &NodeOrder); } -bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone) const { - if (DisableAddiLoadHeuristic) - return false; - - SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; - SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; - if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) { - pickTryCand(Cand, TryCand); - return true; - } - if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) { - pickCand(Cand, TryCand); - return true; - } - - return false; -} - -void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone) const { - GenericScheduler::tryCandidate(Cand, TryCand, Zone); - - if (!Cand.isValid() || !Zone) - return; - - // There are some benefits to schedule the ADDI before the load to hide the - // latency, as RA may create a true dependency between the load and addi. - if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) - return; +void PPCPostRASchedStrategy::initializeHeuristics() { + BiasAddi = INIT_HEURISTIC("BIAS-ADDI", { + if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) + return &TryCand; + if (isADDIInstr(Cand) && !isADDIInstr(TryCand)) + return &Cand; + return nullptr; + }); } -bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand) const { - if (!EnableAddiHeuristic) - return false; - - if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) { - pickTryCand(Cand, TryCand); - return true; - } - - if (isADDIInstr(Cand) && !isADDIInstr(TryCand)) { - pickCand(Cand, TryCand); - return true; - } - return false; -} - -void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand) { - PostGenericScheduler::tryCandidate(Cand, TryCand); - - if (!Cand.isValid()) - return; - - // There are some benefits to schedule the ADDI as early as possible post ra - // to avoid stalled by vector instructions which take up all the hw units. - // And ADDI is usually used to post inc the loop indvar, which matters the - // performance. - if (biasAddiCandidate(Cand, TryCand)) - return; +void PPCPostRASchedStrategy::registerHeuristics() { + PostGenericScheduler::registerHeuristics(); + if (EnableAddiHeuristic) + registerHeuristic(BiasAddi, &NodeOrder); } void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {