diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1012,7 +1012,7 @@ const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const; SUnit *pickNodeBidirectional(bool &IsTopNode); @@ -1075,7 +1075,7 @@ } protected: - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); + virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); void pickNodeFromQueue(SchedCandidate &Cand); }; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3177,33 +3177,33 @@ /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. /// \param Zone describes the scheduled zone that we are extending, or nullptr // if Cand is from a different zone than TryCand. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, +bool GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Bias PhysReg Defs and copies to their uses and defined respectively. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; + return true; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) - return; + return true; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; + return true; // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many @@ -3217,12 +3217,12 @@ // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) - return; + return true; // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return true; } // Keep clustered nodes together to encourage downstream peephole @@ -3238,14 +3238,14 @@ if (tryGreater(TryCand.SU == TryCandNextClusterSU, Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; + return true; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; + return true; } // Avoid increasing the max pressure of the entire region. @@ -3253,24 +3253,24 @@ Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) - return; + return true; if (SameBoundary) { // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return true; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return true; // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; + return true; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) @@ -3278,6 +3278,8 @@ TryCand.Reason = NodeOrder; } } + + return false; } /// Pick the best candidate from the queue. @@ -3543,42 +3545,44 @@ /// /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, +bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Top.getLatencyStallCycles(TryCand.SU), Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return true; // Keep clustered nodes together. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; + return true; // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return true; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return true; // Avoid serializing long latency dependence chains. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; + return true; } // Fall through to original instruction order. if (TryCand.SU->NodeNum < Cand.SU->NodeNum) TryCand.Reason = NodeOrder; + + return false; } void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h @@ -23,8 +23,9 @@ PPCPreRASchedStrategy(const MachineSchedContext *C) : GenericScheduler(C) {} protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override; + private: bool biasAddiLoadCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, @@ -43,7 +44,7 @@ void enterMBB(MachineBasicBlock *MBB) override; void leaveMBB() override; - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const; }; diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -46,118 +46,21 @@ return false; } -void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, +bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { - // From GenericScheduler::tryCandidate - - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; - return; - } - - // Bias PhysReg Defs and copies to their uses and defined respectively. - if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), - biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; - - // Avoid exceeding the target's limit. - if (DAG->isTrackingPressure() && - tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, - RegExcess, TRI, DAG->MF)) - return; - - // Avoid increasing the max critical pressure in the scheduled region. - if (DAG->isTrackingPressure() && - tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, - TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; - - // We only compare a subset of features when comparing nodes between - // Top and Bottom boundary. Some properties are simply incomparable, in many - // other instances we should only override the other boundary if something - // is a clear good pick on one boundary. Skip heuristics that are more - // "tie-breaking" in nature. - bool SameBoundary = Zone != nullptr; - if (SameBoundary) { - // For loops that are acyclic path limited, aggressively schedule for - // latency. Within an single cycle, whenever CurrMOps > 0, allow normal - // heuristics to take precedence. - if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && - tryLatency(TryCand, Cand, *Zone)) - return; - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), - Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - } - - // Keep clustered nodes together to encourage downstream peephole - // optimizations which may reduce resource requirements. - // - // This is a best effort to set things up for a post-RA pass. Optimizations - // like generating loads of multiple registers should ideally be done within - // the scheduler pass by combining the loads during DAG postprocessing. - const SUnit *CandNextClusterSU = - Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - const SUnit *TryCandNextClusterSU = - TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - if (tryGreater(TryCand.SU == TryCandNextClusterSU, - Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; - - if (SameBoundary) { - // Weak edges are for clustering and other constraints. - if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), - getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; - } - - // Avoid increasing the max pressure of the entire region. - if (DAG->isTrackingPressure() && - tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, - Cand, RegMax, TRI, DAG->MF)) - return; - - if (SameBoundary) { - // Avoid critical resource consumption and balance the schedule. - TryCand.initResourceDelta(DAG, SchedModel); - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, TryCand, Cand, - ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - // For acyclic path limited loops, latency was already checked above. - if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && - !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; - - // Fall through to original instruction order. - if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || - (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { - TryCand.Reason = NodeOrder; - } - } - - // GenericScheduler::tryCandidate end - // Add powerpc specific heuristic only when TryCand isn't selected or // selected as node order. - if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; + if (GenericScheduler::tryCandidate(Cand, TryCand, Zone) || + (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)) + return true; // There are some benefits to schedule the ADDI before the load to hide the // latency, as RA may create a true dependency between the load and addi. - if (SameBoundary) { - if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) - return; - } + if (Zone != nullptr && biasAddiLoadCandidate(Cand, TryCand, *Zone)) + return true; + + return false; } bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, @@ -172,57 +75,22 @@ return false; } -void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, +bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { - // From PostGenericScheduler::tryCandidate - - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; - return; - } - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Top.getLatencyStallCycles(TryCand.SU), - Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - - // Keep clustered nodes together. - if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), - Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; - - // Avoid critical resource consumption and balance the schedule. - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, TryCand, Cand, - ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; - } - - // Fall through to original instruction order. - if (TryCand.SU->NodeNum < Cand.SU->NodeNum) - TryCand.Reason = NodeOrder; - - // PostGenericScheduler::tryCandidate end - // Add powerpc post ra specific heuristic only when TryCand isn't selected or // selected as node order. - if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; + if (PostGenericScheduler::tryCandidate(Cand, TryCand) || + (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)) + return true; // There are some benefits to schedule the ADDI as early as possible post ra // to avoid stalled by vector instructions which take up all the hw units. // And ADDI is usually used to post inc the loop indvar, which matters the // performance. if (biasAddiCandidate(Cand, TryCand)) - return; + return true; + + return false; } void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {