Index: tools/llvm-mca/Backend.h =================================================================== --- tools/llvm-mca/Backend.h +++ tools/llvm-mca/Backend.h @@ -16,9 +16,9 @@ #define LLVM_TOOLS_LLVM_MCA_BACKEND_H #include "DispatchStage.h" +#include "ExecuteStage.h" #include "FetchStage.h" #include "InstrBuilder.h" -#include "Scheduler.h" namespace mca { @@ -55,8 +55,8 @@ /// TODO: Eventually this will become a list of unique Stage* that this /// backend pipeline executes. std::unique_ptr Fetch; - std::unique_ptr HWS; std::unique_ptr Dispatch; + std::unique_ptr Execute; std::set Listeners; unsigned Cycles; @@ -69,13 +69,13 @@ unsigned RegisterFileSize = 0, unsigned LoadQueueSize = 0, unsigned StoreQueueSize = 0, bool AssumeNoAlias = false) : Fetch(std::move(InitialStage)), - HWS(llvm::make_unique(this, Subtarget.getSchedModel(), - LoadQueueSize, StoreQueueSize, - AssumeNoAlias)), Dispatch(llvm::make_unique( - this, Subtarget, MRI, RegisterFileSize, DispatchWidth, HWS.get())), + this, Subtarget, MRI, RegisterFileSize, DispatchWidth)), + Execute(llvm::make_unique(this, Subtarget.getSchedModel(), + LoadQueueSize, StoreQueueSize, + AssumeNoAlias)), Cycles(0) { - HWS->setDispatchStage(Dispatch.get()); + Execute->setDispatchStage(Dispatch.get()); } void run(); Index: tools/llvm-mca/Backend.cpp =================================================================== --- tools/llvm-mca/Backend.cpp +++ tools/llvm-mca/Backend.cpp @@ -39,11 +39,16 @@ InstRef IR; Dispatch->preExecute(IR); - HWS->cycleEvent(); // TODO: This will eventually be stage-ified. + Execute->preExecute(IR); while (Fetch->execute(IR)) { - if (!Dispatch->execute(IR)) + // Note that Execute::canBeDispatched must be attempted if + // Dispatch::canDispatch returns true. Execute::canBeDispatched will + // notify of HW stalls. + if (!Dispatch->canDispatch(IR) || !Execute->canBeDispatched(IR)) break; + Dispatch->execute(IR); + Execute->execute(IR); Fetch->postExecute(IR); } Index: tools/llvm-mca/CMakeLists.txt =================================================================== --- tools/llvm-mca/CMakeLists.txt +++ tools/llvm-mca/CMakeLists.txt @@ -15,6 +15,7 @@ CodeRegion.cpp DispatchStage.cpp DispatchStatistics.cpp + ExecuteStage.cpp FetchStage.cpp HWEventListener.cpp InstrBuilder.cpp @@ -28,7 +29,6 @@ ResourcePressureView.cpp RetireControlUnit.cpp RetireControlUnitStatistics.cpp - Scheduler.cpp SchedulerStatistics.cpp Stage.cpp Support.cpp Index: tools/llvm-mca/DispatchStage.h =================================================================== --- tools/llvm-mca/DispatchStage.h +++ tools/llvm-mca/DispatchStage.h @@ -1,4 +1,4 @@ -//===----------------------- Dispatch.h -------------------------*- C++ -*-===// +//===----------------------- DispatchStage.h --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -26,15 +26,14 @@ namespace mca { class WriteState; -class Scheduler; class Backend; // Implements the hardware dispatch logic. // // This class is responsible for the dispatch stage, in which instructions are -// dispatched in groups to the Scheduler. An instruction can be dispatched if -// functional units are available. -// To be more specific, an instruction can be dispatched to the Scheduler if: +// dispatched in groups to the ExecuteStage. An instruction can be dispatched +// if functional units are available. +// To be more specific, an instruction can be dispatched to the ExecuteStage if: // 1) There are enough entries in the reorder buffer (implemented by class // RetireControlUnit) to accommodate all opcodes. // 2) There are enough temporaries to rename output register operands. @@ -56,7 +55,6 @@ unsigned DispatchWidth; unsigned AvailableEntries; unsigned CarryOver; - Scheduler *SC; std::unique_ptr RAT; std::unique_ptr RCU; Backend *Owner; @@ -64,7 +62,6 @@ bool checkRAT(const InstRef &IR); bool checkRCU(const InstRef &IR); - bool checkScheduler(const InstRef &IR); void dispatch(InstRef IR); bool isRCUEmpty() const { return RCU->isEmpty(); } void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI); @@ -76,11 +73,6 @@ return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth; } - bool canDispatch(const InstRef &IR) { - assert(isAvailable(IR.getInstruction()->getDesc().NumMicroOps)); - return checkRCU(IR) && checkRAT(IR) && checkScheduler(IR); - } - void collectWrites(llvm::SmallVectorImpl &Vec, unsigned RegID) const { return RAT->collectWrites(Vec, RegID); @@ -89,15 +81,15 @@ public: DispatchStage(Backend *B, const llvm::MCSubtargetInfo &Subtarget, const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize, - unsigned MaxDispatchWidth, Scheduler *Sched) + unsigned MaxDispatchWidth) : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), - CarryOver(0U), SC(Sched), - RAT(llvm::make_unique(Subtarget.getSchedModel(), MRI, - RegisterFileSize)), + CarryOver(0U), RAT(llvm::make_unique( + Subtarget.getSchedModel(), MRI, RegisterFileSize)), RCU(llvm::make_unique(Subtarget.getSchedModel(), this)), Owner(B), STI(Subtarget) {} + bool canDispatch(const InstRef &IR); virtual bool isReady() const override final { return isRCUEmpty(); } virtual void preExecute(const InstRef &IR) override final; virtual bool execute(InstRef &IR) override final; Index: tools/llvm-mca/DispatchStage.cpp =================================================================== --- tools/llvm-mca/DispatchStage.cpp +++ tools/llvm-mca/DispatchStage.cpp @@ -1,4 +1,4 @@ -//===--------------------- Dispatch.cpp -------------------------*- C++ -*-===// +//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,7 +15,6 @@ #include "DispatchStage.h" #include "Backend.h" #include "HWEventListener.h" -#include "Scheduler.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -65,10 +64,6 @@ return false; } -bool DispatchStage::checkScheduler(const InstRef &IR) { - return SC->canBeDispatched(IR); -} - void DispatchStage::updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI) { SmallVector DependentWrites; @@ -135,11 +130,6 @@ // Notify listeners of the "instruction dispatched" event. notifyInstructionDispatched(IR, RegisterFiles); - - // Now move the instruction into the scheduler's queue. - // The scheduler is responsible for checking if this is a zero-latency - // instruction that doesn't consume pipeline/scheduler resources. - SC->scheduleInstruction(IR); } void DispatchStage::preExecute(const InstRef &IR) { @@ -148,10 +138,14 @@ CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U; } -bool DispatchStage::execute(InstRef &IR) { +bool DispatchStage::canDispatch(const InstRef &IR) { const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (!isAvailable(Desc.NumMicroOps) || !canDispatch(IR)) + if (!isAvailable(Desc.NumMicroOps) || !checkRCU(IR) || !checkRAT(IR)) return false; + return true; +} + +bool DispatchStage::execute(InstRef &IR) { dispatch(IR); return true; } Index: tools/llvm-mca/ExecuteStage.h =================================================================== --- tools/llvm-mca/ExecuteStage.h +++ tools/llvm-mca/ExecuteStage.h @@ -1,4 +1,4 @@ -//===--------------------- Scheduler.h ------------------------*- C++ -*-===// +//===------------------------- ExecuteStage.cpp -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,7 @@ #include "Instruction.h" #include "LSUnit.h" +#include "Stage.h" #include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCSubtargetInfo.h" #include @@ -264,7 +265,7 @@ /// A resource manager for processor resource units and groups. /// /// This class owns all the ResourceState objects, and it is responsible for -/// acting on requests from a Scheduler by updating the internal state of +/// acting on requests from a ExecuteStage by updating the internal state of /// ResourceState objects. /// This class doesn't know about instruction itineraries and functional units. /// In future, it can be extended to support itineraries too through the same @@ -375,21 +376,21 @@ #endif }; // namespace mca -/// Class Scheduler is responsible for issuing instructions to pipeline +/// Class ExecuteStage is responsible for issuing instructions to pipeline /// resources. Internally, it delegates to a ResourceManager the management of -/// processor resources. -/// This class is also responsible for tracking the progress of instructions -/// from the dispatch stage, until the write-back stage. +/// processor resources. This class is also responsible for tracking the +/// progress of instructions from the dispatch stage, until the write-back +/// stage. /// -/// An nstruction dispatched to the Scheduler is initially placed into either -/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the -/// input operands. Instructions in the WaitQueue are ordered by instruction -/// index. An instruction is moved from the WaitQueue to the ReadyQueue when -/// register operands become available, and all memory dependencies are met. -/// Instructions that are moved from the WaitQueue to the ReadyQueue transition -/// from state 'IS_AVAILABLE' to state 'IS_READY'. +/// An instruction dispatched to the ExecuteStage is initially placed into +/// either the 'WaitQueue' or the 'ReadyQueue' depending on the availability +/// of the input operands. Instructions in the WaitQueue are ordered by +/// instruction index. An instruction is moved from the WaitQueue to the +/// ReadyQueue when register operands become available, and all memory +/// dependencies are met. Instructions that are moved from the WaitQueue to +/// the ReadyQueue transition from state 'IS_AVAILABLE' to state 'IS_READY'. /// -/// At the beginning of each cycle, the Scheduler checks if there are +/// At the beginning of each cycle, the ExecuteStage checks if there are /// instructions in the WaitQueue that can be moved to the ReadyQueue. If the /// ReadyQueue is not empty, then older instructions from the queue are issued /// to the processor pipelines, and the underlying ResourceManager is updated @@ -400,7 +401,7 @@ /// issued to a (one or more) pipeline(s). This event also causes an instruction /// state transition (i.e. from state IS_READY, to state IS_EXECUTING). /// An Instruction leaves the IssuedQueue when it reaches the write-back stage. -class Scheduler { +class ExecuteStage : public Stage { const llvm::MCSchedModel &SM; // Hardware resources that are managed by this scheduler. @@ -447,9 +448,10 @@ void updateIssuedQueue(llvm::SmallVectorImpl &Executed); public: - Scheduler(Backend *B, const llvm::MCSchedModel &Model, unsigned LoadQueueSize, - unsigned StoreQueueSize, bool AssumeNoAlias) - : SM(Model), Resources(llvm::make_unique(SM)), + ExecuteStage(Backend *B, const llvm::MCSchedModel &Model, + unsigned LoadQueueSize, unsigned StoreQueueSize, + bool AssumeNoAlias) + : Stage(), SM(Model), Resources(llvm::make_unique(SM)), LSU(llvm::make_unique(LoadQueueSize, StoreQueueSize, AssumeNoAlias)), Owner(B) {} @@ -458,9 +460,9 @@ /// Check if the instruction in 'IR' can be dispatched. /// - /// The DispatchStage is responsible for querying the Scheduler before + /// The DispatchStage is responsible for querying the ExecuteStage before /// dispatching new instructions. Queries are performed through method - /// `Scheduler::canBeDispatched`. If scheduling resources are available, + /// `ExecuteStage::canBeDispatched`. If scheduling resources are available, /// and the instruction can be dispatched, then this method returns true. /// Otherwise, a generic HWStallEvent is notified to the listeners. bool canBeDispatched(const InstRef &IR) const; @@ -479,7 +481,8 @@ Resources->releaseBuffers(Buffers); } - void cycleEvent(); + virtual void preExecute(const InstRef &IR) override final; + virtual bool execute(InstRef &IR) override final; #ifndef NDEBUG void dump() const; Index: tools/llvm-mca/ExecuteStage.cpp =================================================================== --- tools/llvm-mca/ExecuteStage.cpp +++ tools/llvm-mca/ExecuteStage.cpp @@ -1,4 +1,4 @@ -//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// +//===------------------------- ExecuteStage.cpp -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "Scheduler.h" +#include "ExecuteStage.h" #include "Backend.h" #include "HWEventListener.h" #include "Support.h" @@ -228,7 +228,7 @@ BusyResources.erase(RF); } -void Scheduler::scheduleInstruction(InstRef &IR) { +bool ExecuteStage::execute(InstRef &IR) { const unsigned Idx = IR.getSourceIndex(); assert(WaitQueue.find(Idx) == WaitQueue.end()); assert(ReadyQueue.find(Idx) == ReadyQueue.end()); @@ -248,7 +248,7 @@ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding " << Idx << " to the Wait Queue\n"); WaitQueue[Idx] = IR.getInstruction(); - return; + return false; } notifyInstructionReady(IR); @@ -269,16 +269,17 @@ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding " << IR << " to the Ready Queue\n"); ReadyQueue[IR.getSourceIndex()] = IR.getInstruction(); - return; + return false; } LLVM_DEBUG(dbgs() << "[SCHEDULER] Instruction " << IR << " issued immediately\n"); // Release buffered resources and issue MCIS to the underlying pipelines. issueInstruction(IR); + return true; } -void Scheduler::cycleEvent() { +void ExecuteStage::preExecute(const InstRef &Unused) { SmallVector ResourcesFreed; Resources->cycleEvent(ResourcesFreed); @@ -316,7 +317,7 @@ } #ifndef NDEBUG -void Scheduler::dump() const { +void ExecuteStage::dump() const { dbgs() << "[SCHEDULER]: WaitQueue size is: " << WaitQueue.size() << '\n'; dbgs() << "[SCHEDULER]: ReadyQueue size is: " << ReadyQueue.size() << '\n'; dbgs() << "[SCHEDULER]: IssuedQueue size is: " << IssuedQueue.size() << '\n'; @@ -324,7 +325,7 @@ } #endif -bool Scheduler::canBeDispatched(const InstRef &IR) const { +bool ExecuteStage::canBeDispatched(const InstRef &IR) const { HWStallEvent::GenericEventType Type = HWStallEvent::Invalid; const InstrDesc &Desc = IR.getInstruction()->getDesc(); @@ -348,7 +349,7 @@ return false; } -void Scheduler::issueInstructionImpl( +void ExecuteStage::issueInstructionImpl( InstRef &IR, SmallVectorImpl> &UsedResources) { Instruction *IS = IR.getInstruction(); @@ -366,7 +367,7 @@ IssuedQueue[IR.getSourceIndex()] = IS; } -void Scheduler::issueInstruction(InstRef &IR) { +void ExecuteStage::issueInstruction(InstRef &IR) { // Release buffered resources. const InstrDesc &Desc = IR.getInstruction()->getDesc(); releaseBuffers(Desc.Buffers); @@ -380,7 +381,7 @@ notifyInstructionExecuted(IR); } -void Scheduler::promoteToReadyQueue(SmallVectorImpl &Ready) { +void ExecuteStage::promoteToReadyQueue(SmallVectorImpl &Ready) { // Scan the set of waiting instructions and promote them to the // ready queue if operands are all ready. for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) { @@ -406,7 +407,7 @@ } } -InstRef Scheduler::select() { +InstRef ExecuteStage::select() { // Give priority to older instructions in the ReadyQueue. Since the ready // queue is ordered by key, this will always prioritize older instructions. const auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(), @@ -424,7 +425,7 @@ return IR; } -void Scheduler::updatePendingQueue(SmallVectorImpl &Ready) { +void ExecuteStage::updatePendingQueue(SmallVectorImpl &Ready) { // Notify to instructions in the pending queue that a new cycle just // started. for (QueueEntryTy Entry : WaitQueue) @@ -432,7 +433,7 @@ promoteToReadyQueue(Ready); } -void Scheduler::updateIssuedQueue(SmallVectorImpl &Executed) { +void ExecuteStage::updateIssuedQueue(SmallVectorImpl &Executed) { for (auto I = IssuedQueue.begin(), E = IssuedQueue.end(); I != E;) { const QueueEntryTy Entry = *I; Instruction *IS = Entry.second; @@ -450,7 +451,7 @@ } } -void Scheduler::notifyInstructionIssued( +void ExecuteStage::notifyInstructionIssued( const InstRef &IR, ArrayRef> Used) { LLVM_DEBUG({ dbgs() << "[E] Instruction Issued: " << IR << '\n'; @@ -463,7 +464,7 @@ Owner->notifyInstructionEvent(HWInstructionIssuedEvent(IR, Used)); } -void Scheduler::notifyInstructionExecuted(const InstRef &IR) { +void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) { LSU->onInstructionExecuted(IR); LLVM_DEBUG(dbgs() << "[E] Instruction Executed: " << IR << '\n'); Owner->notifyInstructionEvent( @@ -471,17 +472,17 @@ DS->onInstructionExecuted(IR.getInstruction()->getRCUTokenID()); } -void Scheduler::notifyInstructionReady(const InstRef &IR) { +void ExecuteStage::notifyInstructionReady(const InstRef &IR) { LLVM_DEBUG(dbgs() << "[E] Instruction Ready: " << IR << '\n'); Owner->notifyInstructionEvent( HWInstructionEvent(HWInstructionEvent::Ready, IR)); } -void Scheduler::notifyResourceAvailable(const ResourceRef &RR) { +void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) { Owner->notifyResourceAvailable(RR); } -void Scheduler::notifyReservedBuffers(ArrayRef Buffers) { +void ExecuteStage::notifyReservedBuffers(ArrayRef Buffers) { if (Buffers.empty()) return; @@ -492,7 +493,7 @@ Owner->notifyReservedBuffers(BufferIDs); } -void Scheduler::notifyReleasedBuffers(ArrayRef Buffers) { +void ExecuteStage::notifyReleasedBuffers(ArrayRef Buffers) { if (Buffers.empty()) return; Index: tools/llvm-mca/HWEventListener.h =================================================================== --- tools/llvm-mca/HWEventListener.h +++ tools/llvm-mca/HWEventListener.h @@ -31,7 +31,7 @@ // and generic Views can manipulate. // Subtargets are free to define additional event types, that are goin to be // handled by generic components as opaque values, but can still be - // emitted by subtarget-specific pipeline components (e.g. Scheduler, + // emitted by subtarget-specific pipeline components (e.g. ExecuteStage, // DispatchStage, ...) and interpreted by subtarget-specific EventListener // implementations. enum GenericEventType { Index: tools/llvm-mca/README.txt =================================================================== --- tools/llvm-mca/README.txt +++ tools/llvm-mca/README.txt @@ -431,39 +431,41 @@ scheduling model; latency values are defined by the scheduling model through ProcWriteResources objects. -Class Scheduler (see file Scheduler.h) knows how to emulate multiple processor -schedulers. A Scheduler is responsible for tracking data dependencies, and -dynamically select which processor resources are consumed/used by instructions. - -Internally, the Scheduler class delegates the management of processor resource -units and resource groups to the ResourceManager class. ResourceManager is also -responsible for selecting resource units that are effectively consumed by -instructions. For example, if an instruction consumes 1cy of a resource group, -the ResourceManager object selects one of the available units from the group; by -default, it uses a round-robin selector to guarantee that resource usage is -uniformly distributed between all units of a group. - -Internally, class Scheduler implements three instruction queues: +Class ExecuteStage (see file ExecuteStage.h) knows how to emulate multiple +processor schedulers. The ExecuteStage is responsible for tracking data +dependencies, and dynamically select which processor resources are +consumed/used by instructions. + +Internally, the ExecuteStage class delegates the management of processor +resource units and resource groups to the ResourceManager class. +ResourceManager is also responsible for selecting resource units that are +effectively consumed by instructions. For example, if an instruction consumes +1cy of a resource group, the ResourceManager object selects one of the +available units from the group; by default, it uses a round-robin selector to +guarantee that resource usage is uniformly distributed between all units of a +group. + +Internally, class ExecuteStage implements three instruction queues: - WaitQueue: a queue of instructions whose operands are not ready yet. - ReadyQueue: a queue of instructions ready to execute. - IssuedQueue: a queue of instructions executing. Depending on the operands availability, instructions that are dispatched to the -Scheduler are either placed into the WaitQueue or into the ReadyQueue. +ExecuteStage are either placed into the WaitQueue or into the ReadyQueue. -Every cycle, class Scheduler checks if instructions can be moved from the +Every cycle, class ExecuteStage checks if instructions can be moved from the WaitQueue to the ReadyQueue, and if instructions from the ReadyQueue can be issued to the underlying pipelines. The algorithm prioritizes older instructions over younger instructions. -Objects of class ResourceState (see Scheduler.h) describe processor resources. -There is an instance of class ResourceState for each single processor resource -specified by the scheduling model. A ResourceState object for a processor -resource with multiple units dynamically tracks the availability of every single -unit. For example, the ResourceState of a resource group tracks the -availability of every resource in that group. Internally, ResourceState -implements a round-robin selector to dynamically pick the next unit to use from -the group. +Objects of class ResourceState (see ExecuteStage.h) describe processor +resources. There is an instance of class ResourceState for each single +processor resource specified by the scheduling model. A ResourceState object +for a processor resource with multiple units dynamically tracks the +availability of every single unit. For example, the ResourceState of a +resource group tracks the availability of every resource in that group. +Internally, ResourceState implements a round-robin selector to dynamically pick +the next unit to use from the group. Write-Back and Retire Stage --------------------------- @@ -610,10 +612,10 @@ accordingly. For such targets, there is no dynamic scheduling done by the hardware. -Existing classes (DispatchStage, Scheduler, etc.) could be extended/adapted to -support processors with a single dispatch/issue stage. The execution flow would -require some changes in the way how existing components (i.e., DispatchStage, -Scheduler, etc.) interact. This can be a future development. +Existing classes (DispatchStage, ExecuteStage, etc.) could be extended/adapted +to support processors with a single dispatch/issue stage. The execution flow +would require some changes in the way how existing components (i.e., +DispatchStage, ExecuteStage, etc.) interact. This can be a future development. The following sections describes other known limitations. The goal is not to provide an extensive list of limitations; we want to report what we believe are @@ -641,7 +643,7 @@ itineraries. This is probably one of the most important limitations, since it affects a few out-of-order processors in LLVM. -As mentioned in section 'Instruction Issue', class Scheduler delegates to an +As mentioned in section 'Instruction Issue', class ExecuteStage delegates to an instance of class ResourceManager the handling of processor resources. ResourceManager is where most of the scheduling logic is implemented.