diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -975,7 +975,6 @@ the ``IssueWidth`` parameter in LLVM's scheduling model. Once issued, an instruction is moved to ``IssuedInst`` set until it is ready to -retire. If ``RetireControlUnit`` is defined in the LLVM's scheduling model, -:program:`llvm-mca` ensures that instructions are retired in-order. However, an -instruction is allowed to retire out-of-order if ``RetireOOO`` property is true -for at least one of its writes. +retire. :program:`llvm-mca` ensures that writes are committed in-order. However, +an instruction is allowed to commit writes and retire out-of-order if +``RetireOOO`` property is true for at least one of its writes. diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h --- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h +++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h @@ -27,12 +27,10 @@ namespace mca { class RegisterFile; class ResourceManager; -struct RetireControlUnit; class InOrderIssueStage final : public Stage { const MCSchedModel &SM; const MCSubtargetInfo &STI; - RetireControlUnit &RCU; RegisterFile &PRF; std::unique_ptr RM; @@ -70,11 +68,10 @@ Error updateIssuedInst(); public: - InOrderIssueStage(RetireControlUnit &RCU, RegisterFile &PRF, - const MCSchedModel &SM, const MCSubtargetInfo &STI) - : SM(SM), STI(STI), RCU(RCU), PRF(PRF), - RM(std::make_unique(SM)), NumIssued(0), - StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {} + InOrderIssueStage(RegisterFile &PRF, const MCSchedModel &SM, + const MCSubtargetInfo &STI) + : SM(SM), STI(STI), PRF(PRF), RM(std::make_unique(SM)), + NumIssued(0), StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {} bool isAvailable(const InstRef &) const override; bool hasWorkToComplete() const override; diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h --- a/llvm/include/llvm/MCA/Stages/RetireStage.h +++ b/llvm/include/llvm/MCA/Stages/RetireStage.h @@ -27,7 +27,7 @@ class RetireStage final : public Stage { // Owner will go away when we move listeners/eventing to the stages. - RetireControlUnit &RCU; + RetireControlUnit *RCU; RegisterFile &PRF; LSUnitBase &LSU; SmallVector RetireInst; @@ -36,11 +36,11 @@ RetireStage &operator=(const RetireStage &Other) = delete; public: - RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS) + RetireStage(RetireControlUnit *R, RegisterFile &F, LSUnitBase &LS) : Stage(), RCU(R), PRF(F), LSU(LS) {} bool hasWorkToComplete() const override { - return !RCU.isEmpty() || !RetireInst.empty(); + return (RCU && !RCU->isEmpty()) || !RetireInst.empty(); } Error cycleStart() override; Error execute(InstRef &IR) override; diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp --- a/llvm/lib/MCA/Context.cpp +++ b/llvm/lib/MCA/Context.cpp @@ -48,7 +48,7 @@ *RCU, *PRF); auto Execute = std::make_unique(*HWS, Opts.EnableBottleneckAnalysis); - auto Retire = std::make_unique(*RCU, *PRF, *LSU); + auto Retire = std::make_unique(RCU.get(), *PRF, *LSU); // Pass the ownership of all the hardware units to this Context. addHardwareUnit(std::move(RCU)); @@ -71,21 +71,19 @@ std::unique_ptr Context::createInOrderPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) { const MCSchedModel &SM = STI.getSchedModel(); - auto RCU = std::make_unique(SM); auto PRF = std::make_unique(SM, MRI, Opts.RegisterFileSize); auto LSU = std::make_unique(SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias); auto Entry = std::make_unique(SrcMgr); - auto InOrderIssue = std::make_unique(*RCU, *PRF, SM, STI); - auto Retire = std::make_unique(*RCU, *PRF, *LSU); + auto InOrderIssue = std::make_unique(*PRF, SM, STI); + auto Retire = std::make_unique(/*RCU=*/nullptr, *PRF, *LSU); auto StagePipeline = std::make_unique(); StagePipeline->appendStage(std::move(Entry)); StagePipeline->appendStage(std::move(InOrderIssue)); StagePipeline->appendStage(std::move(Retire)); - addHardwareUnit(std::move(RCU)); addHardwareUnit(std::move(PRF)); addHardwareUnit(std::move(LSU)); diff --git a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp --- a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -23,6 +23,8 @@ : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), AvailableEntries(SM.isOutOfOrder() ? SM.MicroOpBufferSize : 0), MaxRetirePerCycle(0) { + assert(SM.isOutOfOrder() && + "RetireControlUnit is not available for in-order processors"); // Check if the scheduling model provides extra information about the machine // processor. If so, then use that information to set the reorder buffer size // and the maximum number of instructions retired per cycle. @@ -33,17 +35,12 @@ MaxRetirePerCycle = EPI.MaxRetirePerCycle; } NumROBEntries = AvailableEntries; - if (!SM.isOutOfOrder() && !NumROBEntries) - return; assert(NumROBEntries && "Invalid reorder buffer size!"); Queue.resize(2 * NumROBEntries); } // Reserves a number of slots, and returns a new token. unsigned RetireControlUnit::dispatch(const InstRef &IR) { - if (!NumROBEntries) - return UnhandledTokenID; - const Instruction &Inst = *IR.getInstruction(); unsigned Entries = normalizeQuantity(Inst.getNumMicroOps()); assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!"); diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp --- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp +++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp @@ -191,28 +191,11 @@ } llvm::Error InOrderIssueStage::execute(InstRef &IR) { - Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - - unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID; - if (!Desc.RetireOOO) - RCUTokenID = RCU.dispatch(IR); - IS.dispatch(RCUTokenID); - - if (Desc.EndGroup) { - Bandwidth = 0; - } else { - unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps(); - assert(Bandwidth >= NumMicroOps); - Bandwidth -= NumMicroOps; - } - if (llvm::Error E = tryIssue(IR, &StallCyclesLeft)) return E; if (StallCyclesLeft) { StalledInst = IR; - Bandwidth = 0; } return llvm::ErrorSuccess(); @@ -221,20 +204,26 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) { Instruction &IS = *IR.getInstruction(); unsigned SourceIndex = IR.getSourceIndex(); + const InstrDesc &Desc = IS.getDesc(); if (!canExecute(IR, StallCycles)) { LLVM_DEBUG(dbgs() << "[E] Stalled #" << IR << " for " << *StallCycles << " cycles\n"); + Bandwidth = 0; return llvm::ErrorSuccess(); } + unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID; + IS.dispatch(RCUTokenID); + SmallVector UsedRegs(PRF.getNumRegisterFiles()); addRegisterReadWrite(PRF, IS, SourceIndex, STI, UsedRegs); - notifyInstructionDispatch(IR, IS.getDesc().NumMicroOps, UsedRegs, *this); + unsigned NumMicroOps = IS.getNumMicroOps(); + notifyInstructionDispatch(IR, NumMicroOps, UsedRegs, *this); SmallVector, 4> UsedResources; - RM->issueInstruction(IS.getDesc(), UsedResources); + RM->issueInstruction(Desc, UsedResources); IS.execute(SourceIndex); // Replace resource masks with valid resource processor IDs. @@ -244,8 +233,15 @@ } notifyInstructionExecute(IR, UsedResources, *this); + if (Desc.EndGroup) { + Bandwidth = 0; + } else { + assert(Bandwidth >= NumMicroOps); + Bandwidth -= NumMicroOps; + } + IssuedInst.push_back(IR); - ++NumIssued; + NumIssued += NumMicroOps; if (!IR.getInstruction()->getDesc().RetireOOO) LastWriteBackCycle = findLastWriteBackCycle(IR); @@ -292,6 +288,7 @@ llvm::Error InOrderIssueStage::cycleStart() { NumIssued = 0; + Bandwidth = SM.IssueWidth; // Release consumed resources. SmallVector Freed; @@ -309,7 +306,6 @@ if (!StallCyclesLeft) { StalledInst.invalidate(); assert(NumIssued <= SM.IssueWidth && "Overflow."); - Bandwidth = SM.IssueWidth - NumIssued; } else { // The instruction is still stalled, cannot issue any new instructions in // this cycle. diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp --- a/llvm/lib/MCA/Stages/RetireStage.cpp +++ b/llvm/lib/MCA/Stages/RetireStage.cpp @@ -23,17 +23,19 @@ namespace mca { llvm::Error RetireStage::cycleStart() { - const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle(); - unsigned NumRetired = 0; - while (!RCU.isEmpty()) { - if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) - break; - const RetireControlUnit::RUToken &Current = RCU.getCurrentToken(); - if (!Current.Executed) - break; - notifyInstructionRetired(Current.IR); - RCU.consumeCurrentToken(); - NumRetired++; + if (RCU) { + const unsigned MaxRetirePerCycle = RCU->getMaxRetirePerCycle(); + unsigned NumRetired = 0; + while (!RCU->isEmpty()) { + if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) + break; + const RetireControlUnit::RUToken &Current = RCU->getCurrentToken(); + if (!Current.Executed) + break; + notifyInstructionRetired(Current.IR); + RCU->consumeCurrentToken(); + NumRetired++; + } } // Retire instructions that are not controlled by the RCU @@ -51,7 +53,9 @@ unsigned TokenID = IS.getRCUTokenID(); if (TokenID != RetireControlUnit::UnhandledTokenID) { - RCU.onInstructionExecuted(TokenID); + assert(RCU && + "RCU must be available for an instruction with a valid TokenID"); + RCU->onInstructionExecuted(TokenID); return llvm::ErrorSuccess(); } diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -339,5 +339,4 @@ def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; -def A55RCU : RetireControlUnit<64, 0>; } diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s @@ -57,17 +57,6 @@ # CHECK: Scheduler's queue usage: # CHECK-NEXT: No scheduler resources used. -# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: -# CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 14 (66.7%) -# CHECK-NEXT: 1, 4 (19.0%) -# CHECK-NEXT: 2, 1 (4.8%) -# CHECK-NEXT: 3, 2 (9.5%) - -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) - # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 14 # CHECK-NEXT: Max number of mappings used: 6 diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s @@ -57,17 +57,6 @@ # CHECK: Scheduler's queue usage: # CHECK-NEXT: No scheduler resources used. -# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: -# CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 14 (66.7%) -# CHECK-NEXT: 1, 4 (19.0%) -# CHECK-NEXT: 2, 1 (4.8%) -# CHECK-NEXT: 3, 2 (9.5%) - -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) - # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 14 # CHECK-NEXT: Max number of mappings used: 6 diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s @@ -57,17 +57,6 @@ # CHECK: Scheduler's queue usage: # CHECK-NEXT: No scheduler resources used. -# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: -# CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 14 (70.0%) -# CHECK-NEXT: 1, 2 (10.0%) -# CHECK-NEXT: 2, 2 (10.0%) -# CHECK-NEXT: 3, 2 (10.0%) - -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) - # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 12 # CHECK-NEXT: Max number of mappings used: 7 diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s @@ -55,16 +55,6 @@ # CHECK: Scheduler's queue usage: # CHECK-NEXT: No scheduler resources used. -# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: -# CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 18 (72.0%) -# CHECK-NEXT: 1, 2 (8.0%) -# CHECK-NEXT: 2, 5 (20.0%) - -# CHECK: Total ROB Entries: 64 -# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% ) - # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 12 # CHECK-NEXT: Max number of mappings used: 7 diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp --- a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp +++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/RetireControlUnitStatistics.h" +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" #include "llvm/Support/Format.h" namespace llvm { @@ -71,16 +72,25 @@ } unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles; - double MaxUsagePercentage = ((double)MaxUsedEntries / TotalROBEntries) * 100.0; - double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10; - double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0; - double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10; + double NormalizedMaxPercentage = 0.0; + double NormalizedAvgPercentage = 0.0; + if (TotalROBEntries) { + double MaxUsagePercentage = + ((double)MaxUsedEntries / TotalROBEntries) * 100.0; + NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10; + double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0; + NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10; + } TempStream << "\nTotal ROB Entries: " << TotalROBEntries - << "\nMax Used ROB Entries: " << MaxUsedEntries - << format(" ( %.1f%% )", NormalizedMaxPercentage) - << "\nAverage Used ROB Entries per cy: " << AvgUsage - << format(" ( %.1f%% )\n", NormalizedAvgPercentage); + << "\nMax Used ROB Entries: " << MaxUsedEntries; + if (TotalROBEntries) + TempStream << format(" ( %.1f%% )", NormalizedMaxPercentage); + TempStream << "\nAverage Used ROB Entries per cy: " << AvgUsage; + if (TotalROBEntries) + TempStream << format(" ( %.1f%% )\n", NormalizedAvgPercentage); + else + TempStream << '\n'; TempStream.flush(); OS << Buffer; diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -278,7 +278,8 @@ processOptionImpl(PrintRegisterFileStats, Default); processOptionImpl(PrintDispatchStats, Default); processOptionImpl(PrintSchedulerStats, Default); - processOptionImpl(PrintRetireStats, Default); + if (IsOutOfOrder) + processOptionImpl(PrintRetireStats, Default); } // Returns true on success.