Index: llvm/trunk/include/llvm/CodeGen/MachineScheduler.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineScheduler.h +++ llvm/trunk/include/llvm/CodeGen/MachineScheduler.h @@ -895,6 +895,10 @@ #ifndef NDEBUG void traceCandidate(const SchedCandidate &Cand); #endif + +private: + bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, + bool ComputeRemLatency, unsigned &RemLatency) const; }; // Utility functions used by heuristics in tryCandidate(). Index: llvm/trunk/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp @@ -2397,6 +2397,52 @@ } } +/// Compute remaining latency. We need this both to determine whether the +/// overall schedule has become latency-limited and whether the instructions +/// outside this zone are resource or latency limited. +/// +/// The "dependent" latency is updated incrementally during scheduling as the +/// max height/depth of scheduled nodes minus the cycles since it was +/// scheduled: +/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone +/// +/// The "independent" latency is the max ready queue depth: +/// ILat = max N.depth for N in Available|Pending +/// +/// RemainingLatency is the greater of independent and dependent latency. +/// +/// These computations are expensive, especially in DAGs with many edges, so +/// only do them if necessary. +static unsigned computeRemLatency(SchedBoundary &CurrZone) { + unsigned RemLatency = CurrZone.getDependentLatency(); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Available.elements())); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Pending.elements())); + return RemLatency; +} + +/// Returns true if the current cycle plus remaning latency is greater than +/// the cirtical path in the scheduling region. +bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy, + SchedBoundary &CurrZone, + bool ComputeRemLatency, + unsigned &RemLatency) const { + // The current cycle is already greater than the critical path, so we are + // already latnecy limited and don't need to compute the remaining latency. + if (CurrZone.getCurrCycle() > Rem.CriticalPath) + return true; + + // If we haven't scheduled anything yet, then we aren't latency limited. + if (CurrZone.getCurrCycle() == 0) + return false; + + if (ComputeRemLatency) + RemLatency = computeRemLatency(CurrZone); + + return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath; +} + /// Set the CandPolicy given a scheduling zone given the current resources and /// latencies inside and outside the zone. void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, @@ -2406,46 +2452,32 @@ // inside and outside this zone. Potential stalls should be considered before // following this policy. - // Compute remaining latency. We need this both to determine whether the - // overall schedule has become latency-limited and whether the instructions - // outside this zone are resource or latency limited. - // - // The "dependent" latency is updated incrementally during scheduling as the - // max height/depth of scheduled nodes minus the cycles since it was - // scheduled: - // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone - // - // The "independent" latency is the max ready queue depth: - // ILat = max N.depth for N in Available|Pending - // - // RemainingLatency is the greater of independent and dependent latency. - unsigned RemLatency = CurrZone.getDependentLatency(); - RemLatency = std::max(RemLatency, - CurrZone.findMaxLatency(CurrZone.Available.elements())); - RemLatency = std::max(RemLatency, - CurrZone.findMaxLatency(CurrZone.Pending.elements())); - // Compute the critical resource outside the zone. unsigned OtherCritIdx = 0; unsigned OtherCount = OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; bool OtherResLimited = false; - if (SchedModel->hasInstrSchedModel()) + unsigned RemLatency = 0; + bool RemLatencyComputed = false; + if (SchedModel->hasInstrSchedModel() && OtherCount != 0) { + RemLatency = computeRemLatency(CurrZone); + RemLatencyComputed = true; OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), OtherCount, RemLatency); + } // Schedule aggressively for latency in PostRA mode. We don't check for // acyclic latency during PostRA, and highly out-of-order processors will // skip PostRA scheduling. - if (!OtherResLimited) { - if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { - Policy.ReduceLatency |= true; - LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName() - << " RemainingLatency " << RemLatency << " + " - << CurrZone.getCurrCycle() << "c > CritPath " - << Rem.CriticalPath << "\n"); - } + if (!OtherResLimited && + (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed, + RemLatency))) { + Policy.ReduceLatency |= true; + LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName() + << " RemainingLatency " << RemLatency << " + " + << CurrZone.getCurrCycle() << "c > CritPath " + << Rem.CriticalPath << "\n"); } // If the same resource is limiting inside and outside the zone, do nothing. if (CurrZone.getZoneCritResIdx() == OtherCritIdx)