diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -674,6 +674,10 @@ // it. SmallVector ReservedCyclesIndex; + // For each PIdx, stores the resource group ID of which it is a member + SmallVector ResourceGroupIDs; + bool HasUnbufferedResourceGroup; + #ifndef NDEBUG // Remember the greatest possible stall as an upper bound on the number of // times we should retry the pending queue because of a hazard. @@ -751,8 +755,20 @@ unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles); - std::pair getNextResourceCycle(unsigned PIdx, - unsigned Cycles); + std::pair + getNextResourceCycle(unsigned PIdx, unsigned Cycles, unsigned Uses); + + typedef SmallVector, 8> NextResourceCycles; + NextResourceCycles getNextResourceCycleList(unsigned PIdx, unsigned Cycles, + unsigned Uses); + + struct AdjustedResEntry { + uint16_t ProcResourceIdx; + uint16_t Cycles; + uint16_t Uses; + }; + void adjustProcResources(const MCSchedClassDesc *SC, + SmallVector &PR); bool checkHazard(SUnit *SU); @@ -775,7 +791,8 @@ void incExecutedResources(unsigned PIdx, unsigned Count); - unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); + unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned Uses, + unsigned ReadyCycle); void bumpNode(SUnit *SU); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2004,6 +2004,7 @@ IsResourceLimited = false; ReservedCycles.clear(); ReservedCyclesIndex.clear(); + ResourceGroupIDs.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -2045,11 +2046,22 @@ unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); ReservedCyclesIndex.resize(ResourceCount); ExecutedResCounts.resize(ResourceCount); + ResourceGroupIDs.resize(ResourceCount, 0); + HasUnbufferedResourceGroup = false; unsigned NumUnits = 0; for (unsigned i = 0; i < ResourceCount; ++i) { ReservedCyclesIndex[i] = NumUnits; NumUnits += SchedModel->getProcResource(i)->NumUnits; + if (SchedModel->getProcResource(i)->SubUnitsIdxBegin) { + HasUnbufferedResourceGroup |= + !(SchedModel->getProcResource(i)->BufferSize); + for (unsigned u = 0, ue = SchedModel->getProcResource(i)->NumUnits; + u != ue; ++u) { + ResourceGroupIDs[SchedModel->getProcResource(i) + ->SubUnitsIdxBegin[u]] = i; + } + } } ReservedCycles.resize(NumUnits, InvalidCycle); @@ -2091,7 +2103,8 @@ /// scheduled. Returns the next cycle and the index of the processor resource /// instance in the reserved cycles vector. std::pair -SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { +SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles, + unsigned Uses) { unsigned MinNextUnreserved = InvalidCycle; unsigned InstanceIdx = 0; unsigned StartIndex = ReservedCyclesIndex[PIdx]; @@ -2099,15 +2112,109 @@ assert(NumberOfInstances > 0 && "Cannot have zero instances of a ProcResource"); - for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; - ++I) { - unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); - if (MinNextUnreserved > NextUnreserved) { - InstanceIdx = I; - MinNextUnreserved = NextUnreserved; + if (Uses == 1) { + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = I; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceIdx); + } else { + SmallVector, 8> NextResCycles; + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + NextResCycles.emplace_back(NextUnreserved, I); + } + // Find the earliest (Uses) resource instances + std::nth_element(NextResCycles.begin(), NextResCycles.begin() + (Uses - 1), + NextResCycles.end()); + return NextResCycles[Uses - 1]; + } +} + +/// Compute the next cycle at which the given processor resource can be +/// scheduled. Returns the next cycle and the indices of the processor resource +/// instances in the reserved cycles vector. +SchedBoundary::NextResourceCycles +SchedBoundary::getNextResourceCycleList(unsigned PIdx, unsigned Cycles, + unsigned Uses) { + unsigned MinNextUnreserved = InvalidCycle; + unsigned InstanceIdx = 0; + unsigned StartIndex = ReservedCyclesIndex[PIdx]; + unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; + assert(NumberOfInstances > 0 && + "Cannot have zero instances of a ProcResource"); + + SmallVector, 8> NextResCycles; + if (Uses == 1) { + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = I; + MinNextUnreserved = NextUnreserved; + } + } + NextResCycles.emplace_back(MinNextUnreserved, InstanceIdx); + } else { + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + NextResCycles.emplace_back(NextUnreserved, I); + } + // Find the earliest (Uses) resource instances + std::nth_element(NextResCycles.begin(), NextResCycles.begin() + (Uses - 1), + NextResCycles.end()); + NextResCycles.resize(Uses); + } + + return NextResCycles; +} + +/// Adjust the processor write resources to use for unbuffered ProcResGroups. +/// The problem that must be solved is that the WriteProcRes table reports +/// the "total" cycles*instances claim upon a group from direct uses of it to +/// it as well as uses of its member. When BufferSize = 0, individual claims +/// for instances of the group are needed. This is accomplished by generating +/// a new write resource vector (in a passed-in temporary vector) which has +/// individual claims on the group from each member of it as well as a claim +/// upon the entire group which is reduced by those individual claims. +void SchedBoundary::adjustProcResources(const MCSchedClassDesc *SC, + SmallVector &PR) { + PR.clear(); + if (HasUnbufferedResourceGroup) { + SmallVector GroupAdjustments; + GroupAdjustments.resize(SchedModel->getNumProcResourceKinds(), 0); + bool HasResGroup = false; + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); + PI != PE; ++PI) { + unsigned ResGrpID = ResourceGroupIDs[PI->ProcResourceIdx]; + if (ResGrpID && !SchedModel->getProcResource(ResGrpID)->BufferSize) { + GroupAdjustments[ResGrpID]++; + HasResGroup = true; + } + } + if (HasResGroup) { + PR.clear(); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); + PI != PE; ++PI) + if (GroupAdjustments[PI->ProcResourceIdx]) + PR.emplace_back(AdjustedResEntry{ + PI->ProcResourceIdx, + uint16_t(PI->Cycles / GroupAdjustments[PI->ProcResourceIdx]), + GroupAdjustments[PI->ProcResourceIdx]}); + else + PR.emplace_back(AdjustedResEntry{PI->ProcResourceIdx, PI->Cycles, 1}); } } - return std::make_pair(MinNextUnreserved, InstanceIdx); } /// Does this SU have a hazard within the current instruction group. @@ -2146,24 +2253,47 @@ if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - for (const MCWriteProcResEntry &PE : - make_range(SchedModel->getWriteProcResBegin(SC), - SchedModel->getWriteProcResEnd(SC))) { - unsigned ResIdx = PE.ProcResourceIdx; - unsigned Cycles = PE.Cycles; - unsigned NRCycle, InstanceIdx; - std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); - if (NRCycle > CurrCycle) { + SmallVector ResTmp; + adjustProcResources(SC, ResTmp); + if (!ResTmp.size()) + for (const MCWriteProcResEntry &PE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + unsigned ResIdx = PE.ProcResourceIdx; + unsigned Cycles = PE.Cycles; + unsigned NRCycle, InstanceIdx; + std::tie(NRCycle, InstanceIdx) = + getNextResourceCycle(ResIdx, Cycles, 1); + if (NRCycle > CurrCycle) { #ifndef NDEBUG - MaxObservedStall = std::max(Cycles, MaxObservedStall); + MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif - LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " - << SchedModel->getResourceName(ResIdx) - << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' - << "=" << NRCycle << "c\n"); - return true; + LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(ResIdx) << '[' + << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' + << "=" << NRCycle << "c\n"); + return true; + } + } + else + for (auto &PE : ResTmp) { + unsigned ResIdx = PE.ProcResourceIdx; + unsigned Cycles = PE.Cycles; + unsigned Uses = PE.Uses; + unsigned NRCycle, InstanceIdx; + std::tie(NRCycle, InstanceIdx) = + getNextResourceCycle(ResIdx, Cycles, Uses); + if (NRCycle > CurrCycle) { +#ifndef NDEBUG + MaxObservedStall = std::max(Cycles, MaxObservedStall); +#endif + LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(ResIdx) << '[' + << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' + << "=" << NRCycle << "c\n"); + return true; + } } - } } return false; } @@ -2302,8 +2432,8 @@ /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { +unsigned SchedBoundary::countResource(unsigned PIdx, unsigned Cycles, + unsigned Uses, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" @@ -2325,7 +2455,8 @@ } // For reserved resources, record the highest cycle using the resource. unsigned NextAvailable, InstanceIdx; - std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); + std::tie(NextAvailable, InstanceIdx) = + getNextResourceCycle(PIdx, Cycles, Uses); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getResourceName(PIdx) @@ -2401,33 +2532,63 @@ << "c\n"); } } - for (TargetSchedModel::ProcResIter - PI = SchedModel->getWriteProcResBegin(SC), - PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); - if (RCycle > NextCycle) - NextCycle = RCycle; - } + SmallVector ResTmp; + adjustProcResources(SC, ResTmp); + if (!ResTmp.size()) + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); + PI != PE; ++PI) { + unsigned RCycle = + countResource(PI->ProcResourceIdx, PI->Cycles, 1, NextCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; + } + else + for (auto &PI : ResTmp) { + unsigned RCycle = + countResource(PI.ProcResourceIdx, PI.Cycles, PI.Uses, NextCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; + } if (SU->hasReservedResource) { // For reserved resources, record the highest cycle using the resource. // For top-down scheduling, this is the cycle in which we schedule this // instruction plus the number of cycles the operations reserves the // resource. For bottom-up is it simply the instruction's cycle. - for (TargetSchedModel::ProcResIter - PI = SchedModel->getWriteProcResBegin(SC), - PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - unsigned PIdx = PI->ProcResourceIdx; - if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { - unsigned ReservedUntil, InstanceIdx; - std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); - if (isTop()) { - ReservedCycles[InstanceIdx] = - std::max(ReservedUntil, NextCycle + PI->Cycles); - } else - ReservedCycles[InstanceIdx] = NextCycle; + if (!ResTmp.size()) + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); + PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + unsigned ReservedUntil, InstanceIdx; + std::tie(ReservedUntil, InstanceIdx) = + getNextResourceCycle(PIdx, 0, 1); + if (isTop()) { + ReservedCycles[InstanceIdx] = + std::max(ReservedUntil, NextCycle + PI->Cycles); + } else + ReservedCycles[InstanceIdx] = NextCycle; + } + } + else + for (auto &PI : ResTmp) { + unsigned PIdx = PI.ProcResourceIdx; + if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + auto NRC = getNextResourceCycleList(PIdx, 0, PI.Uses); + for (auto &R : NRC) { + unsigned ReservedUntil = R.first; + unsigned InstanceIdx = R.second; + if (isTop()) { + ReservedCycles[InstanceIdx] = + std::max(ReservedUntil, NextCycle + PI.Cycles); + } else + ReservedCycles[InstanceIdx] = NextCycle; + } + } } - } } } // Update ExpectedLatency and DependentLatency.