diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -751,8 +751,10 @@ unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles); - std::pair getNextResourceCycle(unsigned PIdx, - unsigned Cycles); + typedef SmallVector, 8> NextResourceCycles; + + NextResourceCycles getNextResourceCycle(unsigned PIdx, unsigned Cycles, + unsigned Uses); bool checkHazard(SUnit *SU); @@ -775,7 +777,8 @@ void incExecutedResources(unsigned PIdx, unsigned Count); - unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); + unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned Uses, + unsigned ReadyCycle); void bumpNode(SUnit *SU); diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h --- a/llvm/include/llvm/MC/MCSchedule.h +++ b/llvm/include/llvm/MC/MCSchedule.h @@ -64,9 +64,11 @@ struct MCWriteProcResEntry { uint16_t ProcResourceIdx; uint16_t Cycles; + uint16_t Uses; bool operator==(const MCWriteProcResEntry &Other) const { - return ProcResourceIdx == Other.ProcResourceIdx && Cycles == Other.Cycles; + return ProcResourceIdx == Other.ProcResourceIdx && Cycles == Other.Cycles && + Uses == Other.Uses; } }; diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td --- a/llvm/include/llvm/Target/TargetSchedule.td +++ b/llvm/include/llvm/Target/TargetSchedule.td @@ -252,6 +252,7 @@ class ProcWriteResources resources> { list ProcResources = resources; list ResourceCycles = []; + list ResourceUses = []; int Latency = 1; int NumMicroOps = 1; bit BeginGroup = false; @@ -284,7 +285,13 @@ // cycle, regardless of latency, which models a fully pipelined processing // unit. A value of 0 for ResourceCycles means that the resource must // be available but is not consumed, which is only relevant for -// unbuffered resources. +// unbuffered resources. Optionally, ResourceUses indicates the number of +// copies of the resource which are consumed. Each ResourceUses item is +// paired with the ProcResource item at the same position in its list. +// ResourceUses can be `[]`: in that case, a single resource is consumed. +// ResourceUses and ResourceCycles can be used together: in that case, the +// number of copies indicated by ResourceUses are consumed for the number of +// cycles indicated by ResourceCycles. // // By default, each SchedWrite takes one micro-op, which is counted // against the processor's IssueWidth limit. If an instruction can diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1067,7 +1067,7 @@ STI->getWriteProcResEnd(SCDesc))) { if (!PRE.Cycles) continue; - Resources[PRE.ProcResourceIdx]++; + Resources[PRE.ProcResourceIdx] += PRE.Uses; } return; } @@ -3066,7 +3066,7 @@ ProcResourceCount[I->ProcResourceIdx], NumUnits, I->Cycles); }); - if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits) + if (ProcResourceCount[I->ProcResourceIdx] + I->Uses > NumUnits) return false; } LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";); @@ -3095,7 +3095,7 @@ STI->getWriteProcResEnd(SCDesc))) { if (!PRE.Cycles) continue; - ++ProcResourceCount[PRE.ProcResourceIdx]; + ProcResourceCount[PRE.ProcResourceIdx] += PRE.Uses; LLVM_DEBUG({ if (SwpDebugResource) { const MCProcResourceDesc *ProcResource = diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2032,7 +2032,7 @@ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; unsigned Factor = SchedModel->getResourceFactor(PIdx); - RemainingCounts[PIdx] += (Factor * PI->Cycles); + RemainingCounts[PIdx] += (Factor * PI->Cycles * PI->Uses); } } } @@ -2089,27 +2089,44 @@ return NextUnreserved; } -/// Compute the next cycle at which the given processor resource can be -/// scheduled. Returns the next cycle and the index of the processor resource -/// instance in the reserved cycles vector. -std::pair -SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { - unsigned MinNextUnreserved = InvalidCycle; - unsigned InstanceIdx = 0; +/// Compute the next cycle at which the given processor resources can be +/// scheduled. Returns the next cycle and the indices of the processor resource +/// instances in the reserved cycles vector. +SchedBoundary::NextResourceCycles +SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles, + unsigned Uses) { + NextResourceCycles InstanceIdx; unsigned StartIndex = ReservedCyclesIndex[PIdx]; unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; assert(NumberOfInstances > 0 && "Cannot have zero instances of a ProcResource"); - for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; - ++I) { - unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); - if (MinNextUnreserved > NextUnreserved) { - InstanceIdx = I; - MinNextUnreserved = NextUnreserved; + if (Uses == 1) { + InstanceIdx.emplace_back(InvalidCycle, 0); + for (unsigned U = 0; U < Uses; ++U) { + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; + I < End; ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + if (InstanceIdx.back().first > NextUnreserved) { + InstanceIdx.back().second = I; + InstanceIdx.back().first = NextUnreserved; + } + } } + } else { + SmallVector, 8> NextResCycles; + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + InstanceIdx.emplace_back(NextUnreserved, I); + } + // Find the earliest (Uses) resource instances + std::nth_element(InstanceIdx.begin(), InstanceIdx.begin() + (Uses - 1), + InstanceIdx.end()); + InstanceIdx.resize(Uses); + // Leaves the element with the largest NextResourceCycle as the last one } - return std::make_pair(MinNextUnreserved, InstanceIdx); + return InstanceIdx; } /// Does this SU have a hazard within the current instruction group. @@ -2153,8 +2170,10 @@ SchedModel->getWriteProcResEnd(SC))) { unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; - unsigned NRCycle, InstanceIdx; - std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); + unsigned Uses = PE.Uses; + NextResourceCycles NRCycles = getNextResourceCycle(ResIdx, Cycles, Uses); + unsigned NRCycle = NRCycles.back().first; + unsigned InstanceIdx = NRCycles.back().second; if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); @@ -2304,8 +2323,8 @@ /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { +unsigned SchedBoundary::countResource(unsigned PIdx, unsigned Cycles, + unsigned Uses, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" @@ -2326,8 +2345,9 @@ << "c\n"); } // For reserved resources, record the highest cycle using the resource. - unsigned NextAvailable, InstanceIdx; - std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); + NextResourceCycles NRCycles = getNextResourceCycle(PIdx, Cycles, Uses); + unsigned NextAvailable = NRCycles.back().first; + unsigned InstanceIdx = NRCycles.back().second; if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getResourceName(PIdx) @@ -2407,7 +2427,7 @@ PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); + countResource(PI->ProcResourceIdx, PI->Cycles, PI->Uses, NextCycle); if (RCycle > NextCycle) NextCycle = RCycle; } @@ -2421,13 +2441,14 @@ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { - unsigned ReservedUntil, InstanceIdx; - std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); - if (isTop()) { - ReservedCycles[InstanceIdx] = - std::max(ReservedUntil, NextCycle + PI->Cycles); - } else - ReservedCycles[InstanceIdx] = NextCycle; + NextResourceCycles NRCycles = getNextResourceCycle(PIdx, 0, PI->Uses); + unsigned ReservedUntil = NRCycles.back().first; + for (auto &NRC : NRCycles) + if (isTop()) { + ReservedCycles[NRC.second] = + std::max(ReservedUntil, NextCycle + PI->Cycles); + } else + ReservedCycles[NRC.second] = NextCycle; } } } @@ -2592,9 +2613,9 @@ PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { if (PI->ProcResourceIdx == Policy.ReduceResIdx) - ResDelta.CritResources += PI->Cycles; + ResDelta.CritResources += PI->Cycles * PI->Uses; if (PI->ProcResourceIdx == Policy.DemandResIdx) - ResDelta.DemandedResources += PI->Cycles; + ResDelta.DemandedResources += PI->Cycles * PI->Uses; } } diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -127,7 +127,7 @@ PI = SchedModel.getWriteProcResBegin(SC), PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); - PRCycles[PI->ProcResourceIdx] += PI->Cycles; + PRCycles[PI->ProcResourceIdx] += PI->Cycles * PI->Uses; } } FBI->InstrCount = InstrCount; @@ -1244,8 +1244,8 @@ PI != PE; ++PI) { if (PI->ProcResourceIdx != ResourceIdx) continue; - Cycles += - (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); + Cycles += (PI->Cycles * PI->Uses * + TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); } } return Cycles; diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp --- a/llvm/lib/MC/MCSchedule.cpp +++ b/llvm/lib/MC/MCSchedule.cpp @@ -95,7 +95,7 @@ if (!I->Cycles) continue; unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits; - double Temp = NumUnits * 1.0 / I->Cycles; + double Temp = NumUnits * 1.0 / (I->Cycles * I->Uses); Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp; } if (Throughput.hasValue()) diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -109,6 +109,7 @@ Record *FindReadAdvance(const CodeGenSchedRW &SchedRead, const CodeGenProcModel &ProcModel); void ExpandProcResources(RecVec &PRVec, std::vector &Cycles, + std::vector &Uses, const CodeGenProcModel &ProcModel); void GenSchedClassTables(const CodeGenProcModel &ProcModel, SchedClassTables &SchedTables); @@ -937,8 +938,10 @@ // resource groups and super resources that cover them. void SubtargetEmitter::ExpandProcResources(RecVec &PRVec, std::vector &Cycles, + std::vector &Uses, const CodeGenProcModel &PM) { assert(PRVec.size() == Cycles.size() && "failed precondition"); + assert(PRVec.size() == Uses.size() && "failed precondition"); for (unsigned i = 0, e = PRVec.size(); i != e; ++i) { Record *PRDef = PRVec[i]; RecVec SubResources; @@ -959,6 +962,7 @@ SubDef->getLoc()); PRVec.push_back(SuperDef); Cycles.push_back(Cycles[i]); + Uses.push_back(Uses[i]); SubDef = SuperDef; } } @@ -975,6 +979,7 @@ if (SubI == SubE) { PRVec.push_back(PR); Cycles.push_back(Cycles[i]); + Uses.push_back(Uses[i]); } } } @@ -1106,6 +1111,8 @@ RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources"); std::vector Cycles = WriteRes->getValueAsListOfInts("ResourceCycles"); + std::vector Uses = + WriteRes->getValueAsListOfInts("ResourceUses"); if (Cycles.empty()) { // If ResourceCycles is not provided, default to one cycle per @@ -1121,8 +1128,22 @@ .concat(" vs ") .concat(Twine(Cycles.size()))); } + if (Uses.empty()) { + // If ResourceUses is not provided, default to one use of each + // resource. + Uses.resize(PRVec.size(), 1); + } else if (Uses.size() != PRVec.size()) { + // If ResourceUses is provided, check consistency. + PrintFatalError( + WriteRes->getLoc(), + Twine("Inconsistent resource uses: !size(ResourceUses) != " + "!size(ProcResources): ") + .concat(Twine(PRVec.size())) + .concat(" vs ") + .concat(Twine(Uses.size()))); + } - ExpandProcResources(PRVec, Cycles, ProcModel); + ExpandProcResources(PRVec, Cycles, Uses, ProcModel); for (unsigned PRIdx = 0, PREnd = PRVec.size(); PRIdx != PREnd; ++PRIdx) { @@ -1130,15 +1151,24 @@ WPREntry.ProcResourceIdx = ProcModel.getProcResourceIdx(PRVec[PRIdx]); assert(WPREntry.ProcResourceIdx && "Bad ProcResourceIdx"); WPREntry.Cycles = Cycles[PRIdx]; + WPREntry.Uses = Uses[PRIdx]; + if (Uses[PRIdx] < 0) + PrintFatalError(WriteRes->getLoc(), "Resource use is less than 0"); + // If this resource is already used in this sequence, add the current // entry's cycles so that the same resource appears to be used // serially, rather than multiple parallel uses. This is important for // in-order machine where the resource consumption is a hazard. + // + // If the resource is to be used in parallel, use the largest + // such number of parallel uses. unsigned WPRIdx = 0, WPREnd = WriteProcResources.size(); for( ; WPRIdx != WPREnd; ++WPRIdx) { if (WriteProcResources[WPRIdx].ProcResourceIdx == WPREntry.ProcResourceIdx) { WriteProcResources[WPRIdx].Cycles += WPREntry.Cycles; + WriteProcResources[WPRIdx].Uses = + std::max(WPREntry.Uses, WriteProcResources[WPRIdx].Uses); break; } } @@ -1242,15 +1272,16 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS) { // Emit global WriteProcResTable. - OS << "\n// {ProcResourceIdx, Cycles}\n" - << "extern const llvm::MCWriteProcResEntry " - << Target << "WriteProcResTable[] = {\n" - << " { 0, 0}, // Invalid\n"; + OS << "\n// {ProcResourceIdx, Cycles, Uses}\n" + << "extern const llvm::MCWriteProcResEntry " << Target + << "WriteProcResTable[] = {\n" + << " { 0, 0, 0}, // Invalid\n"; for (unsigned WPRIdx = 1, WPREnd = SchedTables.WriteProcResources.size(); WPRIdx != WPREnd; ++WPRIdx) { MCWriteProcResEntry &WPREntry = SchedTables.WriteProcResources[WPRIdx]; OS << " {" << format("%2d", WPREntry.ProcResourceIdx) << ", " - << format("%2d", WPREntry.Cycles) << "}"; + << format("%2d", WPREntry.Cycles) << ", " << format("%2d", WPREntry.Uses) + << "}"; if (WPRIdx + 1 < WPREnd) OS << ','; OS << " // #" << WPRIdx << '\n';