diff --git a/llvm/include/llvm/MCA/HWEventListener.h b/llvm/include/llvm/MCA/HWEventListener.h --- a/llvm/include/llvm/MCA/HWEventListener.h +++ b/llvm/include/llvm/MCA/HWEventListener.h @@ -72,9 +72,9 @@ class HWInstructionDispatchedEvent : public HWInstructionEvent { public: HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef Regs, - unsigned UOps) + unsigned UOps, unsigned RCUTokenID) : HWInstructionEvent(HWInstructionEvent::Dispatched, IR), - UsedPhysRegs(Regs), MicroOpcodes(UOps) {} + UsedPhysRegs(Regs), MicroOpcodes(UOps), RCUTokenID(RCUTokenID) {} // Number of physical register allocated for this instruction. There is one // entry per register file. ArrayRef UsedPhysRegs; @@ -88,16 +88,21 @@ // cycle), and each event would declare how many micro opcodes are effectively // been dispatched to the schedulers. unsigned MicroOpcodes; + // RetireControlUnit token that is assigned to the instruction. + unsigned RCUTokenID; }; class HWInstructionRetiredEvent : public HWInstructionEvent { public: - HWInstructionRetiredEvent(const InstRef &IR, ArrayRef Regs) + HWInstructionRetiredEvent(const InstRef &IR, ArrayRef Regs, + unsigned RCUTokenID) : HWInstructionEvent(HWInstructionEvent::Retired, IR), - FreedPhysRegs(Regs) {} + FreedPhysRegs(Regs), RCUTokenID(RCUTokenID) {} // Number of register writes that have been architecturally committed. There // is one entry per register file. ArrayRef FreedPhysRegs; + // RetireControlUnit token that was assigned to the instruction. + unsigned RCUTokenID; }; // A HWStallEvent represents a pipeline stall caused by the lack of hardware diff --git a/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h --- a/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h @@ -93,6 +93,9 @@ // Return the current token from the RCU's circular token queue. const RUToken &getCurrentToken() const; + // Return the ID of the current token returned by getCurrentToken(). + unsigned getCurrentTokenId() const { return CurrentInstructionSlotIdx; }; + const RUToken &peekNextToken() const; // Advance the pointer to the next token in the circular token queue. diff --git a/llvm/include/llvm/MCA/Stages/DispatchStage.h b/llvm/include/llvm/MCA/Stages/DispatchStage.h --- a/llvm/include/llvm/MCA/Stages/DispatchStage.h +++ b/llvm/include/llvm/MCA/Stages/DispatchStage.h @@ -62,7 +62,7 @@ void notifyInstructionDispatched(const InstRef &IR, ArrayRef UsedPhysRegs, - unsigned uOps) const; + unsigned uOps, unsigned RCUTokenID) const; public: DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI, diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h --- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h +++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h @@ -27,12 +27,10 @@ namespace mca { class RegisterFile; class ResourceManager; -struct RetireControlUnit; class InOrderIssueStage final : public Stage { const MCSchedModel &SM; const MCSubtargetInfo &STI; - RetireControlUnit &RCU; RegisterFile &PRF; std::unique_ptr RM; @@ -66,9 +64,9 @@ Error updateIssuedInst(); public: - InOrderIssueStage(RetireControlUnit &RCU, RegisterFile &PRF, + InOrderIssueStage(RegisterFile &PRF, const MCSchedModel &SM, const MCSubtargetInfo &STI) - : SM(SM), STI(STI), RCU(RCU), PRF(PRF), + : SM(SM), STI(STI), PRF(PRF), RM(std::make_unique(SM)), NumIssued(0), StallCyclesLeft(0), Bandwidth(0) {} diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h --- a/llvm/include/llvm/MCA/Stages/RetireStage.h +++ b/llvm/include/llvm/MCA/Stages/RetireStage.h @@ -44,7 +44,7 @@ } Error cycleStart() override; Error execute(InstRef &IR) override; - void notifyInstructionRetired(const InstRef &IR) const; + void notifyInstructionRetired(const InstRef &IR, unsigned RCUTokenID) const; }; } // namespace mca diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp --- a/llvm/lib/MCA/Context.cpp +++ b/llvm/lib/MCA/Context.cpp @@ -77,7 +77,7 @@ Opts.StoreQueueSize, Opts.AssumeNoAlias); auto Entry = std::make_unique(SrcMgr); - auto InOrderIssue = std::make_unique(*RCU, *PRF, SM, STI); + auto InOrderIssue = std::make_unique(*PRF, SM, STI); auto Retire = std::make_unique(*RCU, *PRF, *LSU); auto StagePipeline = std::make_unique(); diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp --- a/llvm/lib/MCA/Stages/DispatchStage.cpp +++ b/llvm/lib/MCA/Stages/DispatchStage.cpp @@ -37,10 +37,11 @@ void DispatchStage::notifyInstructionDispatched(const InstRef &IR, ArrayRef UsedRegs, - unsigned UOps) const { + unsigned UOps, + unsigned RCUTokenID) const { LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); notifyEvent( - HWInstructionDispatchedEvent(IR, UsedRegs, UOps)); + HWInstructionDispatchedEvent(IR, UsedRegs, UOps, RCUTokenID)); } bool DispatchStage::checkPRF(const InstRef &IR) const { @@ -131,7 +132,7 @@ // Notify listeners of the "instruction dispatched" event, // and move IR to the next stage. notifyInstructionDispatched(IR, RegisterFiles, - std::min(DispatchWidth, NumMicroOps)); + std::min(DispatchWidth, NumMicroOps), RCUTokenID); return moveToTheNextStage(IR); } @@ -149,7 +150,8 @@ assert(CarriedOver && "Invalid dispatched instruction"); SmallVector RegisterFiles(PRF.getNumRegisterFiles(), 0U); - notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes); + notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes, + RetireControlUnit::UnhandledTokenID); if (!CarryOver) CarriedOver = InstRef(); return ErrorSuccess(); diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp --- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp +++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp @@ -187,37 +187,20 @@ static void notifyInstructionDispatch(const InstRef &IR, unsigned Ops, const SmallVectorImpl &UsedRegs, - const Stage &S) { + unsigned RCUTokenID, const Stage &S) { S.notifyEvent( - HWInstructionDispatchedEvent(IR, UsedRegs, Ops)); + HWInstructionDispatchedEvent(IR, UsedRegs, Ops, RCUTokenID)); LLVM_DEBUG(dbgs() << "[E] Dispatched #" << IR << "\n"); } llvm::Error InOrderIssueStage::execute(InstRef &IR) { - Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - - unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID; - if (!Desc.RetireOOO) - RCUTokenID = RCU.dispatch(IR); - IS.dispatch(RCUTokenID); - - if (Desc.EndGroup) { - Bandwidth = 0; - } else { - unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps(); - assert(Bandwidth >= NumMicroOps); - Bandwidth -= NumMicroOps; - } - if (llvm::Error E = tryIssue(IR, &StallCyclesLeft)) return E; if (StallCyclesLeft) { StalledInst = IR; - Bandwidth = 0; } return llvm::ErrorSuccess(); @@ -226,20 +209,26 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) { Instruction &IS = *IR.getInstruction(); unsigned SourceIndex = IR.getSourceIndex(); + const InstrDesc &Desc = IS.getDesc(); if (!canExecute(IR, StallCycles)) { LLVM_DEBUG(dbgs() << "[E] Stalled #" << IR << " for " << *StallCycles << " cycles\n"); + Bandwidth = 0; return llvm::ErrorSuccess(); } + unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID; + IS.dispatch(RCUTokenID); + SmallVector UsedRegs(PRF.getNumRegisterFiles()); addRegisterReadWrite(PRF, IS, SourceIndex, STI, UsedRegs); - notifyInstructionDispatch(IR, IS.getDesc().NumMicroOps, UsedRegs, *this); + unsigned NumMicroOps = IS.getNumMicroOps(); + notifyInstructionDispatch(IR, NumMicroOps, UsedRegs, RCUTokenID, *this); SmallVector, 4> UsedResources; - RM->issueInstruction(IS.getDesc(), UsedResources); + RM->issueInstruction(Desc, UsedResources); IS.execute(SourceIndex); // Replace resource masks with valid resource processor IDs. @@ -249,9 +238,16 @@ } notifyInstructionExecute(IR, UsedResources, *this); + if (Desc.EndGroup) { + Bandwidth = 0; + } else { + assert(Bandwidth >= NumMicroOps); + Bandwidth -= NumMicroOps; + } + IssuedInst.push_back(IR); LastIssuedInst = IR; - ++NumIssued; + NumIssued += NumMicroOps; return llvm::ErrorSuccess(); } @@ -295,6 +291,7 @@ llvm::Error InOrderIssueStage::cycleStart() { NumIssued = 0; + Bandwidth = SM.IssueWidth; // Release consumed resources. SmallVector Freed; @@ -312,7 +309,6 @@ if (!StallCyclesLeft) { StalledInst.invalidate(); assert(NumIssued <= SM.IssueWidth && "Overflow."); - Bandwidth = SM.IssueWidth - NumIssued; } else { // The instruction is still stalled, cannot issue any new instructions in // this cycle. diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp --- a/llvm/lib/MCA/Stages/RetireStage.cpp +++ b/llvm/lib/MCA/Stages/RetireStage.cpp @@ -31,7 +31,7 @@ const RetireControlUnit::RUToken &Current = RCU.getCurrentToken(); if (!Current.Executed) break; - notifyInstructionRetired(Current.IR); + notifyInstructionRetired(Current.IR, RCU.getCurrentTokenId()); RCU.consumeCurrentToken(); NumRetired++; } @@ -39,7 +39,7 @@ // Retire instructions that are not controlled by the RCU for (InstRef &IR : RetireInst) { IR.getInstruction()->retire(); - notifyInstructionRetired(IR); + notifyInstructionRetired(IR, RetireControlUnit::UnhandledTokenID); } RetireInst.resize(0); @@ -59,7 +59,8 @@ return llvm::ErrorSuccess(); } -void RetireStage::notifyInstructionRetired(const InstRef &IR) const { +void RetireStage::notifyInstructionRetired(const InstRef &IR, + unsigned RCUTokenID) const { LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n'); llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); const Instruction &Inst = *IR.getInstruction(); @@ -70,7 +71,8 @@ for (const WriteState &WS : Inst.getDefs()) PRF.removeRegisterWrite(WS, FreedRegs); - notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); + notifyEvent( + HWInstructionRetiredEvent(IR, FreedRegs, RCUTokenID)); } } // namespace mca diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -339,5 +339,4 @@ def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; -def A55RCU : RetireControlUnit<64, 0>; } diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s @@ -64,9 +64,9 @@ # CHECK-NEXT: 2, 1 (4.8%) # CHECK-NEXT: 3, 2 (9.5%) -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) +# CHECK: Total ROB Entries: 0 +# CHECK-NEXT: Max Used ROB Entries: 0 +# CHECK-NEXT: Average Used ROB Entries per cy: 0 # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 14 diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s @@ -64,9 +64,9 @@ # CHECK-NEXT: 2, 1 (4.8%) # CHECK-NEXT: 3, 2 (9.5%) -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) +# CHECK: Total ROB Entries: 0 +# CHECK-NEXT: Max Used ROB Entries: 0 +# CHECK-NEXT: Average Used ROB Entries per cy: 0 # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 14 diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s @@ -64,9 +64,9 @@ # CHECK-NEXT: 2, 2 (10.0%) # CHECK-NEXT: 3, 2 (10.0%) -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) +# CHECK: Total ROB Entries: 0 +# CHECK-NEXT: Max Used ROB Entries: 0 +# CHECK-NEXT: Average Used ROB Entries per cy: 0 # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 12 diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s @@ -61,9 +61,9 @@ # CHECK-NEXT: 1, 2 (8.0%) # CHECK-NEXT: 2, 5 (20.0%) -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) +# CHECK: Total ROB Entries: 0 +# CHECK-NEXT: Max Used ROB Entries: 0 +# CHECK-NEXT: Average Used ROB Entries per cy: 0 # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 12 diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp --- a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp +++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/RetireControlUnitStatistics.h" +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" #include "llvm/Support/Format.h" namespace llvm { @@ -30,16 +31,21 @@ void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) { if (Event.Type == HWInstructionEvent::Dispatched) { - unsigned NumEntries = - static_cast(Event).MicroOpcodes; + const auto &Ev = static_cast(Event); + if (Ev.RCUTokenID == RetireControlUnit::UnhandledTokenID) + return; + unsigned NumEntries = Ev.MicroOpcodes; EntriesInUse += NumEntries; } if (Event.Type == HWInstructionEvent::Retired) { + ++NumRetired; + const auto &Ev = static_cast(Event); + if (Ev.RCUTokenID == RetireControlUnit::UnhandledTokenID) + return; unsigned ReleasedEntries = Event.IR.getInstruction()->getDesc().NumMicroOps; assert(EntriesInUse >= ReleasedEntries && "Invalid internal state!"); EntriesInUse -= ReleasedEntries; - ++NumRetired; } } @@ -71,16 +77,25 @@ } unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles; - double MaxUsagePercentage = ((double)MaxUsedEntries / TotalROBEntries) * 100.0; - double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10; - double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0; - double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10; + double NormalizedMaxPercentage = 0.0; + double NormalizedAvgPercentage = 0.0; + if (TotalROBEntries) { + double MaxUsagePercentage = + ((double)MaxUsedEntries / TotalROBEntries) * 100.0; + NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10; + double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0; + NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10; + } TempStream << "\nTotal ROB Entries: " << TotalROBEntries - << "\nMax Used ROB Entries: " << MaxUsedEntries - << format(" ( %.1f%% )", NormalizedMaxPercentage) - << "\nAverage Used ROB Entries per cy: " << AvgUsage - << format(" ( %.1f%% )\n", NormalizedAvgPercentage); + << "\nMax Used ROB Entries: " << MaxUsedEntries; + if (TotalROBEntries) + TempStream << format(" ( %.1f%% )", NormalizedMaxPercentage); + TempStream << "\nAverage Used ROB Entries per cy: " << AvgUsage; + if (TotalROBEntries) + TempStream << format(" ( %.1f%% )\n", NormalizedAvgPercentage); + else + TempStream << '\n'; TempStream.flush(); OS << Buffer;