Index: llvm/include/llvm/CodeGen/MachineScheduler.h =================================================================== --- llvm/include/llvm/CodeGen/MachineScheduler.h +++ llvm/include/llvm/CodeGen/MachineScheduler.h @@ -612,31 +612,10 @@ void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; -/// Each Scheduling boundary is associated with ready queues. It tracks the -/// current cycle in the direction of movement, and maintains the state -/// of "hazards" and other interlocks at the current cycle. -class SchedBoundary { -public: - /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) - enum { - TopQID = 1, - BotQID = 2, - LogMaxQID = 2 - }; - - ScheduleDAGMI *DAG = nullptr; - const TargetSchedModel *SchedModel = nullptr; - SchedRemainder *Rem = nullptr; - - ReadyQueue Available; - ReadyQueue Pending; - - ScheduleHazardRecognizer *HazardRec = nullptr; - -private: - /// True if the pending Q should be checked/updated before scheduling another - /// instruction. - bool CheckPending; +/// Each SchedState is associated with SchedBoundary. It tracks the resource +/// state in the direction of movement. +struct SchedState { + SchedState(); /// Number of cycles it takes to issue the instructions scheduled in this /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. @@ -686,6 +665,36 @@ // times we should retry the pending queue because of a hazard. unsigned MaxObservedStall; #endif +}; + +/// Each Scheduling boundary is associated with ready queues. It tracks the +/// current cycle in the direction of movement, and maintains the state +/// of "hazards" and other interlocks at the current cycle. +class SchedBoundary { +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ScheduleDAGMI *DAG = nullptr; + const TargetSchedModel *SchedModel = nullptr; + SchedRemainder *Rem = nullptr; + + ReadyQueue Available; + ReadyQueue Pending; + + ScheduleHazardRecognizer *HazardRec = nullptr; + +private: + /// True if the pending Q should be checked/updated before scheduling another + /// instruction. + bool CheckPending; + + /// Scheduling resource state. + SchedState State; public: /// Pending queues extend the ready queues with the same ID and the @@ -699,6 +708,9 @@ void reset(); + const SchedState &getState() const { return State; } + void setState(const SchedState &State) { this->State = State; } + void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem); @@ -707,19 +719,19 @@ } /// Number of cycles to issue the instructions scheduled in this zone. - unsigned getCurrCycle() const { return CurrCycle; } + unsigned getCurrCycle() const { return State.CurrCycle; } /// Micro-ops issued in the current cycle - unsigned getCurrMOps() const { return CurrMOps; } + unsigned getCurrMOps() const { return State.CurrMOps; } // The latency of dependence chains leading into this zone. - unsigned getDependentLatency() const { return DependentLatency; } + unsigned getDependentLatency() const { return State.DependentLatency; } /// Get the number of latency cycles "covered" by the scheduled /// instructions. This is the larger of the critical path within the zone /// and the number of cycles required to issue the instructions. unsigned getScheduledLatency() const { - return std::max(ExpectedLatency, CurrCycle); + return std::max(State.ExpectedLatency, State.CurrCycle); } unsigned getUnscheduledLatency(SUnit *SU) const { @@ -727,29 +739,29 @@ } unsigned getResourceCount(unsigned ResIdx) const { - return ExecutedResCounts[ResIdx]; + return State.ExecutedResCounts[ResIdx]; } /// Get the scaled count of scheduled micro-ops and resources, including /// executed resources. unsigned getCriticalCount() const { - if (!ZoneCritResIdx) - return RetiredMOps * SchedModel->getMicroOpFactor(); - return getResourceCount(ZoneCritResIdx); + if (!State.ZoneCritResIdx) + return State.RetiredMOps * SchedModel->getMicroOpFactor(); + return getResourceCount(State.ZoneCritResIdx); } /// Get a scaled count for the minimum execution time of the scheduled /// micro-ops that are ready to execute by getExecutedCount. Notice the /// feedback loop. unsigned getExecutedCount() const { - return std::max(CurrCycle * SchedModel->getLatencyFactor(), - MaxExecutedResCount); + return std::max(State.CurrCycle * SchedModel->getLatencyFactor(), + State.MaxExecutedResCount); } - unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } + unsigned getZoneCritResIdx() const { return State.ZoneCritResIdx; } // Is the scheduled region resource limited vs. latency limited. - bool isResourceLimited() const { return IsResourceLimited; } + bool isResourceLimited() const { return State.IsResourceLimited; } /// Get the difference between the given SUnit's ready time and the current /// cycle. Index: llvm/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/lib/CodeGen/MachineScheduler.cpp +++ llvm/lib/CodeGen/MachineScheduler.cpp @@ -1850,6 +1850,23 @@ // and possibly other custom schedulers. //===----------------------------------------------------------------------===// +SchedState::SchedState() : + CurrCycle(0), CurrMOps(0), + MinReadyCycle(std::numeric_limits::max()), ExpectedLatency(0), + DependentLatency(0), RetiredMOps(0), MaxExecutedResCount(0), + ZoneCritResIdx(0), IsResourceLimited(false) +#ifndef NDEBUG + // Track the maximum number of stall cycles that could arise either from the + // latency of a DAG edge or the number of cycles that a processor resource is + // reserved (SchedState::ReservedCycles). + , MaxObservedStall(0) +#endif + { + // Reserve a zero-count for invalid CritResIdx. + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); +} + static const unsigned InvalidCycle = ~0U; SchedBoundary::~SchedBoundary() { delete HazardRec; } @@ -1872,25 +1889,8 @@ Available.clear(); Pending.clear(); CheckPending = false; - CurrCycle = 0; - CurrMOps = 0; - MinReadyCycle = std::numeric_limits::max(); - ExpectedLatency = 0; - DependentLatency = 0; - RetiredMOps = 0; - MaxExecutedResCount = 0; - ZoneCritResIdx = 0; - IsResourceLimited = false; - ReservedCycles.clear(); -#ifndef NDEBUG - // Track the maximum number of stall cycles that could arise either from the - // latency of a DAG edge or the number of cycles that a processor resource is - // reserved (SchedBoundary::ReservedCycles). - MaxObservedStall = 0; -#endif - // Reserve a zero-count for invalid CritResIdx. - ExecutedResCounts.resize(1); - assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); + + State = SchedState(); } void SchedRemainder:: @@ -1920,8 +1920,9 @@ SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) { - ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); - ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + State.ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); + State.ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), + InvalidCycle); } } @@ -1937,8 +1938,8 @@ return 0; unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); - if (ReadyCycle > CurrCycle) - return ReadyCycle - CurrCycle; + if (ReadyCycle > State.CurrCycle) + return ReadyCycle - State.CurrCycle; return 0; } @@ -1946,7 +1947,7 @@ /// scheduled. unsigned SchedBoundary:: getNextResourceCycle(unsigned PIdx, unsigned Cycles) { - unsigned NextUnreserved = ReservedCycles[PIdx]; + unsigned NextUnreserved = State.ReservedCycles[PIdx]; // If this resource has never been used, always return cycle zero. if (NextUnreserved == InvalidCycle) return 0; @@ -1976,13 +1977,14 @@ } unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { + if ((State.CurrMOps > 0) && (State.CurrMOps + uops > + SchedModel->getIssueWidth())) { LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; } - if (CurrMOps > 0 && + if (State.CurrMOps > 0 && ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " @@ -1998,9 +2000,9 @@ unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); - if (NRCycle > CurrCycle) { + if (NRCycle > State.CurrCycle) { #ifndef NDEBUG - MaxObservedStall = std::max(Cycles, MaxObservedStall); + State.MaxObservedStall = std::max(Cycles, State.MaxObservedStall); #endif LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " << SchedModel->getResourceName(ResIdx) << "=" @@ -2041,7 +2043,7 @@ return 0; unsigned OtherCritCount = Rem->RemIssueCount - + (RetiredMOps * SchedModel->getMicroOpFactor()); + + (State.RetiredMOps * SchedModel->getMicroOpFactor()); LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); @@ -2068,17 +2070,18 @@ // ReadyCycle was been bumped up to the CurrCycle when this node was // scheduled, but CurrCycle may have been eagerly advanced immediately after // scheduling, so may now be greater than ReadyCycle. - if (ReadyCycle > CurrCycle) - MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); + if (ReadyCycle > State.CurrCycle) + State.MaxObservedStall = std::max(ReadyCycle - State.CurrCycle, + State.MaxObservedStall); #endif - if (ReadyCycle < MinReadyCycle) - MinReadyCycle = ReadyCycle; + if (ReadyCycle < State.MinReadyCycle) + State.MinReadyCycle = ReadyCycle; // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; - if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) || + if ((!IsBuffered && ReadyCycle > State.CurrCycle) || checkHazard(SU) || Available.size() >= ReadyListLimit) Pending.push(SU); else @@ -2088,27 +2091,28 @@ /// Move the boundary of scheduled code by one cycle. void SchedBoundary::bumpCycle(unsigned NextCycle) { if (SchedModel->getMicroOpBufferSize() == 0) { - assert(MinReadyCycle < std::numeric_limits::max() && + assert(State.MinReadyCycle < std::numeric_limits::max() && "MinReadyCycle uninitialized"); - if (MinReadyCycle > NextCycle) - NextCycle = MinReadyCycle; + if (State.MinReadyCycle > NextCycle) + NextCycle = State.MinReadyCycle; } // Update the current micro-ops, which will issue in the next cycle. - unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); - CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; + unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - + State.CurrCycle); + State.CurrMOps = (State.CurrMOps <= DecMOps) ? 0 : State.CurrMOps - DecMOps; // Decrement DependentLatency based on the next cycle. - if ((NextCycle - CurrCycle) > DependentLatency) - DependentLatency = 0; + if ((NextCycle - State.CurrCycle) > State.DependentLatency) + State.DependentLatency = 0; else - DependentLatency -= (NextCycle - CurrCycle); + State.DependentLatency -= (NextCycle - State.CurrCycle); if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. - CurrCycle = NextCycle; + State.CurrCycle = NextCycle; } else { // Bypass getHazardType calls in case of long latency. - for (; CurrCycle != NextCycle; ++CurrCycle) { + for (; State.CurrCycle != NextCycle; ++State.CurrCycle) { if (isTop()) HazardRec->AdvanceCycle(); else @@ -2116,18 +2120,18 @@ } } CheckPending = true; - IsResourceLimited = + State.IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), getScheduledLatency()); - LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() - << '\n'); + LLVM_DEBUG(dbgs() << "Cycle: " << State.CurrCycle << ' ' << + Available.getName() << '\n'); } void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { - ExecutedResCounts[PIdx] += Count; - if (ExecutedResCounts[PIdx] > MaxExecutedResCount) - MaxExecutedResCount = ExecutedResCounts[PIdx]; + State.ExecutedResCounts[PIdx] += Count; + if (State.ExecutedResCounts[PIdx] > State.MaxExecutedResCount) + State.MaxExecutedResCount = State.ExecutedResCounts[PIdx]; } /// Add the given processor resource to this scheduled zone. @@ -2151,8 +2155,9 @@ // Check if this resource exceeds the current critical resource. If so, it // becomes the critical resource. - if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { - ZoneCritResIdx = PIdx; + if (State.ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > + getCriticalCount())) { + State.ZoneCritResIdx = PIdx; LLVM_DEBUG(dbgs() << " *** Critical resource " << SchedModel->getResourceName(PIdx) << ": " << getResourceCount(PIdx) / SchedModel->getLatencyFactor() @@ -2160,7 +2165,7 @@ } // For reserved resources, record the highest cycle using the resource. unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); - if (NextAvailable > CurrCycle) { + if (NextAvailable > State.CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getProcResource(PIdx)->Name << " reserved until @" << NextAvailable << "\n"); @@ -2184,16 +2189,17 @@ const MCSchedClassDesc *SC = DAG->getSchedClass(SU); unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); assert( - (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && + (State.CurrMOps == 0 || (State.CurrMOps + IncMOps) <= + SchedModel->getIssueWidth()) && "Cannot schedule this instruction's MicroOps in the current cycle."); unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); - unsigned NextCycle = CurrCycle; + unsigned NextCycle = State.CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: - assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + assert(ReadyCycle <= State.CurrCycle && "Broken PendingQueue"); break; case 1: if (ReadyCycle > NextCycle) { @@ -2210,23 +2216,23 @@ NextCycle = ReadyCycle; break; } - RetiredMOps += IncMOps; + State.RetiredMOps += IncMOps; // Update resource counts and critical resource. if (SchedModel->hasInstrSchedModel()) { unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); Rem->RemIssueCount -= DecRemIssue; - if (ZoneCritResIdx) { + if (State.ZoneCritResIdx) { // Scale scheduled micro-ops for comparing with the critical resource. unsigned ScaledMOps = - RetiredMOps * SchedModel->getMicroOpFactor(); + State.RetiredMOps * SchedModel->getMicroOpFactor(); // If scaled micro-ops are now more than the previous critical resource by // a full cycle, then micro-ops issue becomes critical. - if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) + if ((int)(ScaledMOps - getResourceCount(State.ZoneCritResIdx)) >= (int)SchedModel->getLatencyFactor()) { - ZoneCritResIdx = 0; + State.ZoneCritResIdx = 0; LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: " << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); @@ -2251,18 +2257,20 @@ unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { if (isTop()) { - ReservedCycles[PIdx] = + State.ReservedCycles[PIdx] = std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); } else - ReservedCycles[PIdx] = NextCycle; + State.ReservedCycles[PIdx] = NextCycle; } } } } // Update ExpectedLatency and DependentLatency. - unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; - unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; + unsigned &TopLatency = isTop() ? State.ExpectedLatency : + State.DependentLatency; + unsigned &BotLatency = isTop() ? State.DependentLatency : + State.ExpectedLatency; if (SU->getDepth() > TopLatency) { TopLatency = SU->getDepth(); LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU(" @@ -2274,12 +2282,12 @@ << SU->NodeNum << ") " << BotLatency << "c\n"); } // If we stall for any reason, bump the cycle. - if (NextCycle > CurrCycle) + if (NextCycle > State.CurrCycle) bumpCycle(NextCycle); else // After updating ZoneCritResIdx and ExpectedLatency, check if we're // resource limited. If a stall occurred, bumpCycle does this. - IsResourceLimited = + State.IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), getScheduledLatency()); @@ -2287,7 +2295,7 @@ // resets CurrMOps. Loop to handle instructions with more MOps than issue in // one cycle. Since we commonly reach the max MOps here, opportunistically // bump the cycle to avoid uselessly checking everything in the readyQ. - CurrMOps += IncMOps; + State.CurrMOps += IncMOps; // Bump the cycle count for issue group constraints. // This must be done after NextCycle has been adjust for all other stalls. @@ -2300,9 +2308,9 @@ bumpCycle(++NextCycle); } - while (CurrMOps >= SchedModel->getIssueWidth()) { - LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle " - << CurrCycle << '\n'); + while (State.CurrMOps >= SchedModel->getIssueWidth()) { + LLVM_DEBUG(dbgs() << " *** Max MOps " << State.CurrMOps << " at cycle " + << State.CurrCycle << '\n'); bumpCycle(++NextCycle); } LLVM_DEBUG(dumpScheduledState()); @@ -2313,7 +2321,7 @@ void SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) - MinReadyCycle = std::numeric_limits::max(); + State.MinReadyCycle = std::numeric_limits::max(); // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. @@ -2322,10 +2330,10 @@ SUnit *SU = *(Pending.begin()+i); unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; - if (ReadyCycle < MinReadyCycle) - MinReadyCycle = ReadyCycle; + if (ReadyCycle < State.MinReadyCycle) + State.MinReadyCycle = ReadyCycle; - if (!IsBuffered && ReadyCycle > CurrCycle) + if (!IsBuffered && ReadyCycle > State.CurrCycle) continue; if (checkHazard(SU)) @@ -2358,7 +2366,7 @@ if (CheckPending) releasePending(); - if (CurrMOps > 0) { + if (State.CurrMOps > 0) { // Defer any ready instrs that now have a hazard. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { if (checkHazard(*I)) { @@ -2374,7 +2382,7 @@ // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && // "permanent hazard"); (void)i; - bumpCycle(CurrCycle + 1); + bumpCycle(State.CurrCycle + 1); releasePending(); } @@ -2392,22 +2400,22 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { unsigned ResFactor; unsigned ResCount; - if (ZoneCritResIdx) { - ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); - ResCount = getResourceCount(ZoneCritResIdx); + if (State.ZoneCritResIdx) { + ResFactor = SchedModel->getResourceFactor(State.ZoneCritResIdx); + ResCount = getResourceCount(State.ZoneCritResIdx); } else { ResFactor = SchedModel->getMicroOpFactor(); - ResCount = RetiredMOps * ResFactor; + ResCount = State.RetiredMOps * ResFactor; } unsigned LFactor = SchedModel->getLatencyFactor(); - dbgs() << Available.getName() << " @" << CurrCycle << "c\n" - << " Retired: " << RetiredMOps; + dbgs() << Available.getName() << " @" << State.CurrCycle << "c\n" + << " Retired: " << State.RetiredMOps; dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; dbgs() << "\n Critical: " << ResCount / LFactor << "c, " << ResCount / ResFactor << " " - << SchedModel->getResourceName(ZoneCritResIdx) - << "\n ExpectedLatency: " << ExpectedLatency << "c\n" - << (IsResourceLimited ? " - Resource" : " - Latency") + << SchedModel->getResourceName(State.ZoneCritResIdx) + << "\n ExpectedLatency: " << State.ExpectedLatency << "c\n" + << (State.IsResourceLimited ? " - Resource" : " - Latency") << " limited.\n"; } #endif