Index: llvm/trunk/tools/llvm-mca/Backend.h =================================================================== --- llvm/trunk/tools/llvm-mca/Backend.h +++ llvm/trunk/tools/llvm-mca/Backend.h @@ -24,6 +24,7 @@ class HWEventListener; class HWInstructionEvent; +class HWStallEvent; /// \brief An out of order backend for a specific subtarget. /// @@ -97,18 +98,10 @@ return HWS->getBuffersUsage(Usage); } - unsigned getNumRATStalls() const { return DU->getNumRATStalls(); } - unsigned getNumRCUStalls() const { return DU->getNumRCUStalls(); } - unsigned getNumSQStalls() const { return DU->getNumSQStalls(); } - unsigned getNumLDQStalls() const { return DU->getNumLDQStalls(); } - unsigned getNumSTQStalls() const { return DU->getNumSTQStalls(); } - unsigned getNumDispatchGroupStalls() const { - return DU->getNumDispatchGroupStalls(); - } - void addEventListener(HWEventListener *Listener); void notifyCycleBegin(unsigned Cycle); void notifyInstructionEvent(const HWInstructionEvent &Event); + void notifyStallEvent(const HWStallEvent &Event); void notifyResourceAvailable(const ResourceRef &RR); void notifyCycleEnd(unsigned Cycle); }; Index: llvm/trunk/tools/llvm-mca/Backend.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Backend.cpp +++ llvm/trunk/tools/llvm-mca/Backend.cpp @@ -36,7 +36,8 @@ std::unique_ptr NewIS( IB->createInstruction(STI, IR.first, *IR.second)); const InstrDesc &Desc = NewIS->getDesc(); - if (!DU->isAvailable(Desc.NumMicroOps) || !DU->canDispatch(*NewIS)) + if (!DU->isAvailable(Desc.NumMicroOps) || + !DU->canDispatch(IR.first, *NewIS)) break; Instruction *IS = NewIS.get(); @@ -62,6 +63,11 @@ Listener->onInstructionEvent(Event); } +void Backend::notifyStallEvent(const HWStallEvent &Event) { + for (HWEventListener *Listener : Listeners) + Listener->onStallEvent(Event); +} + void Backend::notifyResourceAvailable(const ResourceRef &RR) { DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' << RR.second << "]\n"); Index: llvm/trunk/tools/llvm-mca/BackendStatistics.h =================================================================== --- llvm/trunk/tools/llvm-mca/BackendStatistics.h +++ llvm/trunk/tools/llvm-mca/BackendStatistics.h @@ -59,6 +59,7 @@ #include "Backend.h" #include "View.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" #include @@ -80,6 +81,10 @@ unsigned NumRetired; unsigned NumCycles; + // Counts dispatch stall events caused by unavailability of resources. There + // is one counter for every generic stall kind (see class HWStallEvent). + llvm::SmallVector HWStalls; + void updateHistograms() { DispatchGroupSizePerCycle[NumDispatched]++; IssuedPerCycle[NumIssued]++; @@ -93,10 +98,7 @@ void printDispatchUnitStatistics(llvm::raw_ostream &OS) const; void printSchedulerStatistics(llvm::raw_ostream &OS) const; - void printDispatchStalls(llvm::raw_ostream &OS, unsigned RATStalls, - unsigned RCUStalls, unsigned SQStalls, - unsigned LDQStalls, unsigned STQStalls, - unsigned DGStalls) const; + void printDispatchStalls(llvm::raw_ostream &OS) const; void printRATStatistics(llvm::raw_ostream &OS, unsigned Mappings, unsigned MaxUsedMappings) const; void printRCUStatistics(llvm::raw_ostream &OS, const Histogram &Histogram, @@ -111,7 +113,7 @@ public: BackendStatistics(const Backend &backend, const llvm::MCSubtargetInfo &sti) : B(backend), STI(sti), NumDispatched(0), NumIssued(0), NumRetired(0), - NumCycles(0) {} + NumCycles(0), HWStalls(HWStallEvent::LastGenericEvent) {} void onInstructionEvent(const HWInstructionEvent &Event) override; @@ -119,10 +121,13 @@ void onCycleEnd(unsigned Cycle) override { updateHistograms(); } + void onStallEvent(const HWStallEvent &Event) override { + if (Event.Type < HWStallEvent::LastGenericEvent) + HWStalls[Event.Type]++; + } + void printView(llvm::raw_ostream &OS) const override { - printDispatchStalls(OS, B.getNumRATStalls(), B.getNumRCUStalls(), - B.getNumSQStalls(), B.getNumLDQStalls(), - B.getNumSTQStalls(), B.getNumDispatchGroupStalls()); + printDispatchStalls(OS); printRATStatistics(OS, B.getTotalRegisterMappingsCreated(), B.getMaxUsedRegisterMappings()); printDispatchUnitStatistics(OS); Index: llvm/trunk/tools/llvm-mca/BackendStatistics.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/BackendStatistics.cpp +++ llvm/trunk/tools/llvm-mca/BackendStatistics.cpp @@ -105,27 +105,22 @@ OS << Buffer; } -void BackendStatistics::printDispatchStalls(raw_ostream &OS, unsigned RATStalls, - unsigned RCUStalls, - unsigned SCHEDQStalls, - unsigned LDQStalls, - unsigned STQStalls, - unsigned DGStalls) const { +void BackendStatistics::printDispatchStalls(raw_ostream &OS) const { std::string Buffer; raw_string_ostream TempStream(Buffer); TempStream << "\n\nDynamic Dispatch Stall Cycles:\n"; TempStream << "RAT - Register unavailable: " - << RATStalls; + << HWStalls[HWStallEvent::RegisterFileStall]; TempStream << "\nRCU - Retire tokens unavailable: " - << RCUStalls; + << HWStalls[HWStallEvent::RetireControlUnitStall]; TempStream << "\nSCHEDQ - Scheduler full: " - << SCHEDQStalls; + << HWStalls[HWStallEvent::SchedulerQueueFull]; TempStream << "\nLQ - Load queue full: " - << LDQStalls; + << HWStalls[HWStallEvent::LoadQueueFull]; TempStream << "\nSQ - Store queue full: " - << STQStalls; + << HWStalls[HWStallEvent::StoreQueueFull]; TempStream << "\nGROUP - Static restrictions on the dispatch group: " - << DGStalls; + << HWStalls[HWStallEvent::DispatchGroupStall]; TempStream << '\n'; TempStream.flush(); OS << Buffer; Index: llvm/trunk/tools/llvm-mca/Dispatch.h =================================================================== --- llvm/trunk/tools/llvm-mca/Dispatch.h +++ llvm/trunk/tools/llvm-mca/Dispatch.h @@ -255,41 +255,9 @@ std::unique_ptr RCU; Backend *Owner; - /// Dispatch stall event identifiers. - /// - /// The naming convention is: - /// * Event names starts with the "DS_" prefix - /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the - /// the unavailable resource/functional unit acronym (example: RAT) - /// * The last substring is the event reason (example: REG_UNAVAILABLE means - /// that register renaming couldn't find enough spare registers in the - /// register file). - /// - /// List of acronyms used for processor resoures: - /// RAT - Register Alias Table (used by the register renaming logic) - /// RCU - Retire Control Unit - /// SQ - Scheduler's Queue - /// LDQ - Load Queue - /// STQ - Store Queue - enum { - DS_RAT_REG_UNAVAILABLE, - DS_RCU_TOKEN_UNAVAILABLE, - DS_SQ_TOKEN_UNAVAILABLE, - DS_LDQ_TOKEN_UNAVAILABLE, - DS_STQ_TOKEN_UNAVAILABLE, - DS_DISPATCH_GROUP_RESTRICTION, - DS_LAST - }; - - // The DispatchUnit track dispatch stall events caused by unavailable - // of hardware resources. Events are classified based on the stall kind; - // so we have a counter for every source of dispatch stall. Counters are - // stored into a vector `DispatchStall` which is always of size DS_LAST. - std::vector DispatchStalls; - - bool checkRAT(const Instruction &Desc); - bool checkRCU(const InstrDesc &Desc); - bool checkScheduler(const InstrDesc &Desc); + bool checkRAT(unsigned Index, const Instruction &Desc); + bool checkRCU(unsigned Index, const InstrDesc &Desc); + bool checkScheduler(unsigned Index, const InstrDesc &Desc); void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI); void notifyInstructionDispatched(unsigned IID); @@ -304,7 +272,7 @@ RAT(llvm::make_unique(MRI, RegisterFileSize)), RCU(llvm::make_unique(MicroOpBufferSize, MaxRetirePerCycle, this)), - Owner(B), DispatchStalls(DS_LAST, 0) {} + Owner(B) {} unsigned getDispatchWidth() const { return DispatchWidth; } @@ -314,10 +282,11 @@ bool isRCUEmpty() const { return RCU->isEmpty(); } - bool canDispatch(const Instruction &Inst) { + bool canDispatch(unsigned Index, const Instruction &Inst) { const InstrDesc &Desc = Inst.getDesc(); assert(isAvailable(Desc.NumMicroOps)); - return checkRCU(Desc) && checkRAT(Inst) && checkScheduler(Desc); + return checkRCU(Index, Desc) && checkRAT(Index, Inst) && + checkScheduler(Index, Desc); } unsigned dispatch(unsigned IID, Instruction *NewInst, @@ -327,24 +296,6 @@ unsigned RegID) const { return RAT->collectWrites(Vec, RegID); } - unsigned getNumRATStalls() const { - return DispatchStalls[DS_RAT_REG_UNAVAILABLE]; - } - unsigned getNumRCUStalls() const { - return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]; - } - unsigned getNumSQStalls() const { - return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]; - } - unsigned getNumLDQStalls() const { - return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]; - } - unsigned getNumSTQStalls() const { - return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]; - } - unsigned getNumDispatchGroupStalls() const { - return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]; - } unsigned getMaxUsedRegisterMappings(unsigned RegFileIndex = 0) const { return RAT->getMaxUsedRegisterMappings(RegFileIndex); } @@ -362,6 +313,8 @@ void notifyInstructionRetired(unsigned Index); + void notifyDispatchStall(unsigned Index, unsigned EventType); + void onInstructionExecuted(unsigned TokenID) { RCU->onInstructionExecuted(TokenID); } Index: llvm/trunk/tools/llvm-mca/Dispatch.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Dispatch.cpp +++ llvm/trunk/tools/llvm-mca/Dispatch.cpp @@ -280,56 +280,50 @@ } #endif -bool DispatchUnit::checkRAT(const Instruction &Instr) { - // Collect register definitions from the WriteStates. - SmallVector RegDefs; - - for (const std::unique_ptr &Def : Instr.getDefs()) - RegDefs.push_back(Def->getRegisterID()); - - unsigned RegisterMask = RAT->isAvailable(RegDefs); +bool DispatchUnit::checkRAT(unsigned Index, const Instruction &Instr) { + const InstrDesc &Desc = Instr.getDesc(); + unsigned NumWrites = Desc.Writes.size(); + unsigned RegisterMask = RAT->isAvailable(NumWrites); // A mask with all zeroes means: register files are available. if (RegisterMask) { - // TODO: We currently implement a single hardware counter for all the - // dispatch stalls caused by the unavailability of registers in one of the - // register files. In future, we want to let register files directly notify - // hardware listeners in the event of a dispatch stall. This would simplify - // the logic in Dispatch.[h/cpp], and move all the "hardware counting logic" - // into a View (for example: BackendStatistics). - DispatchStalls[DS_RAT_REG_UNAVAILABLE]++; + Owner->notifyStallEvent( + HWStallEvent(HWStallEvent::RegisterFileStall, Index)); return false; } return true; } -bool DispatchUnit::checkRCU(const InstrDesc &Desc) { +bool DispatchUnit::checkRCU(unsigned Index, const InstrDesc &Desc) { unsigned NumMicroOps = Desc.NumMicroOps; if (RCU->isAvailable(NumMicroOps)) return true; - DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]++; + Owner->notifyStallEvent( + HWStallEvent(HWStallEvent::RetireControlUnitStall, Index)); return false; } -bool DispatchUnit::checkScheduler(const InstrDesc &Desc) { +bool DispatchUnit::checkScheduler(unsigned Index, const InstrDesc &Desc) { // If this is a zero-latency instruction, then it bypasses // the scheduler. + HWStallEvent::GenericEventType Type = HWStallEvent::Invalid; switch (SC->canBeDispatched(Desc)) { case Scheduler::HWS_AVAILABLE: return true; case Scheduler::HWS_QUEUE_UNAVAILABLE: - DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]++; + Type = HWStallEvent::SchedulerQueueFull; break; case Scheduler::HWS_LD_QUEUE_UNAVAILABLE: - DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]++; + Type = HWStallEvent::LoadQueueFull; break; case Scheduler::HWS_ST_QUEUE_UNAVAILABLE: - DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]++; + Type = HWStallEvent::StoreQueueFull; break; case Scheduler::HWS_DISPATCH_GROUP_RESTRICTION: - DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]++; + Type = HWStallEvent::DispatchGroupStall; } + Owner->notifyStallEvent(HWStallEvent(Type, Index)); return false; } @@ -399,16 +393,6 @@ void DispatchUnit::dump() const { RAT->dump(); RCU->dump(); - - unsigned DSRAT = DispatchStalls[DS_RAT_REG_UNAVAILABLE]; - unsigned DSRCU = DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]; - unsigned DSSCHEDQ = DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]; - unsigned DSLQ = DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]; - unsigned DSSQ = DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]; - - dbgs() << "STALLS --- RAT: " << DSRAT << ", RCU: " << DSRCU - << ", SCHED_QUEUE: " << DSSCHEDQ << ", LOAD_QUEUE: " << DSLQ - << ", STORE_QUEUE: " << DSSQ << '\n'; } #endif Index: llvm/trunk/tools/llvm-mca/HWEventListener.h =================================================================== --- llvm/trunk/tools/llvm-mca/HWEventListener.h +++ llvm/trunk/tools/llvm-mca/HWEventListener.h @@ -68,6 +68,30 @@ const llvm::ArrayRef> UsedResources; }; +// A HWStallEvent represents a pipeline stall caused by the lack of hardware +// resources. +class HWStallEvent { +public: + enum GenericEventType { + Invalid = 0, + // Generic stall events generated by the DispatchUnit. + RegisterFileStall, + RetireControlUnitStall, + DispatchGroupStall, + SchedulerQueueFull, + LoadQueueFull, + StoreQueueFull, + LastGenericEvent + }; + + HWStallEvent(unsigned type, unsigned index) : Type(type), Index(index) {} + + // The exact meaning of the stall event type depends on the subtarget. + const unsigned Type; + // The index of the instruction in the source manager. + const unsigned Index; +}; + class HWEventListener { public: // Generic events generated by the backend pipeline. @@ -75,6 +99,7 @@ virtual void onCycleEnd(unsigned Cycle) {} virtual void onInstructionEvent(const HWInstructionEvent &Event) {} + virtual void onStallEvent(const HWStallEvent &Event) {} using ResourceRef = std::pair; virtual void onResourceAvailable(const ResourceRef &RRef) {}