diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1012,7 +1012,7 @@ const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const; SUnit *pickNodeBidirectional(bool &IsTopNode); @@ -1075,7 +1075,7 @@ } protected: - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); + virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); void pickNodeFromQueue(SchedCandidate &Cand); }; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2818,6 +2818,8 @@ namespace llvm { /// Return true if this heuristic determines order. +/// TODO: Consider refactor return type of these functions as integer or enum, +/// as we may need to differentiate whether TryCand is better than Cand. bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, @@ -3176,34 +3178,35 @@ /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. /// \param Zone describes the scheduled zone that we are extending, or nullptr -// if Cand is from a different zone than TryCand. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, +/// if Cand is from a different zone than TryCand. +/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) +bool GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Bias PhysReg Defs and copies to their uses and defined respectively. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; + return TryCand.Reason != NoCand; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many @@ -3217,12 +3220,12 @@ // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; } // Keep clustered nodes together to encourage downstream peephole @@ -3238,14 +3241,14 @@ if (tryGreater(TryCand.SU == TryCandNextClusterSU, Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; + return TryCand.Reason != NoCand; } // Avoid increasing the max pressure of the entire region. @@ -3253,31 +3256,34 @@ Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { TryCand.Reason = NodeOrder; + return true; } } + + return false; } /// Pick the best candidate from the queue. @@ -3299,8 +3305,7 @@ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; - tryCandidate(Cand, TryCand, ZoneArg); - if (TryCand.Reason != NoCand) { + if (tryCandidate(Cand, TryCand, ZoneArg)) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); @@ -3378,8 +3383,7 @@ assert(TopCand.isValid()); SchedCandidate Cand = BotCand; TopCand.Reason = NoCand; - tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { + if (tryCandidate(Cand, TopCand, nullptr)) { Cand.setBest(TopCand); LLVM_DEBUG(traceCandidate(Cand)); } @@ -3543,42 +3547,47 @@ /// /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, +/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) +bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Top.getLatencyStallCycles(TryCand.SU), Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; // Keep clustered nodes together. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; + return TryCand.Reason != NoCand; } // Fall through to original instruction order. - if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { TryCand.Reason = NodeOrder; + return true; + } + + return false; } void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { @@ -3588,8 +3597,7 @@ TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); - tryCandidate(Cand, TryCand); - if (TryCand.Reason != NoCand) { + if (tryCandidate(Cand, TryCand)) { Cand.setBest(TryCand); LLVM_DEBUG(traceCandidate(Cand)); } diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h @@ -23,8 +23,9 @@ PPCPreRASchedStrategy(const MachineSchedContext *C) : GenericScheduler(C) {} protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override; + private: bool biasAddiLoadCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, @@ -43,7 +44,7 @@ void enterMBB(MachineBasicBlock *MBB) override; void leaveMBB() override; - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const; }; diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -46,7 +46,7 @@ return false; } -void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, +bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { // From GenericScheduler::tryCandidate @@ -54,25 +54,25 @@ // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Bias PhysReg Defs and copies to their uses and defined respectively. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; + return TryCand.Reason != NoCand; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many @@ -86,12 +86,12 @@ // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; } // Keep clustered nodes together to encourage downstream peephole @@ -106,37 +106,37 @@ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); if (tryGreater(TryCand.SU == TryCandNextClusterSU, Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; + return TryCand.Reason != NoCand; } // Avoid increasing the max pressure of the entire region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || @@ -150,14 +150,16 @@ // Add powerpc specific heuristic only when TryCand isn't selected or // selected as node order. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; + return true; // There are some benefits to schedule the ADDI before the load to hide the // latency, as RA may create a true dependency between the load and addi. if (SameBoundary) { if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) - return; + return TryCand.Reason != NoCand; } + + return TryCand.Reason != NoCand; } bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, @@ -172,38 +174,38 @@ return false; } -void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, +bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { // From PostGenericScheduler::tryCandidate // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Top.getLatencyStallCycles(TryCand.SU), Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; // Keep clustered nodes together. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; + return TryCand.Reason != NoCand; } // Fall through to original instruction order. @@ -215,14 +217,16 @@ // Add powerpc post ra specific heuristic only when TryCand isn't selected or // selected as node order. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; + return true; // There are some benefits to schedule the ADDI as early as possible post ra // to avoid stalled by vector instructions which take up all the hw units. // And ADDI is usually used to post inc the loop indvar, which matters the // performance. if (biasAddiCandidate(Cand, TryCand)) - return; + return TryCand.Reason != NoCand; + + return TryCand.Reason != NoCand; } void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {