Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -24,6 +24,7 @@ llvm-mca.cpp RegisterFileStatistics.cpp ResourcePressureView.cpp + RetireControlUnit.cpp RetireControlUnitStatistics.cpp Scheduler.cpp SchedulerStatistics.cpp Index: Dispatch.h =================================================================== --- Dispatch.h +++ Dispatch.h @@ -17,6 +17,7 @@ #define LLVM_TOOLS_LLVM_MCA_DISPATCH_H #include "Instruction.h" +#include "RetireControlUnit.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include @@ -155,68 +156,6 @@ #endif }; -/// tracks which instructions are in-flight (i.e. dispatched but not -/// retired) in the OoO backend. -/// -/// This class checks on every cycle if/which instructions can be retired. -/// Instructions are retired in program order. -/// In the event of instruction retired, the DispatchUnit object that owns -/// this RetireControlUnit gets notified. -/// On instruction retired, register updates are all architecturally -/// committed, and any temporary registers originally allocated for the -/// retired instruction are freed. -struct RetireControlUnit { - // A "token" (object of class RUToken) is created by the retire unit for every - // instruction dispatched to the schedulers. Flag 'Executed' is used to - // quickly check if an instruction has reached the write-back stage. A token - // also carries information related to the number of entries consumed by the - // instruction in the reorder buffer. The idea is that those entries will - // become available again once the instruction is retired. On every cycle, - // the RCU (Retire Control Unit) scans every token starting to search for - // instructions that are ready to retire. retired. Instructions are retired - // in program order. Only 'Executed' instructions are eligible for retire. - // Note that the size of the reorder buffer is defined by the scheduling model - // via field 'NumMicroOpBufferSize'. - struct RUToken { - unsigned Index; // Instruction index. - unsigned NumSlots; // Slots reserved to this instruction. - bool Executed; // True if the instruction is past the WB stage. - }; - -private: - unsigned NextAvailableSlotIdx; - unsigned CurrentInstructionSlotIdx; - unsigned AvailableSlots; - unsigned MaxRetirePerCycle; // 0 means no limit. - std::vector Queue; - DispatchUnit *Owner; - -public: - RetireControlUnit(const llvm::MCSchedModel &SM, DispatchUnit *DU); - - bool isFull() const { return !AvailableSlots; } - bool isEmpty() const { return AvailableSlots == Queue.size(); } - bool isAvailable(unsigned Quantity = 1) const { - // Some instructions may declare a number of uOps which exceedes the size - // of the reorder buffer. To avoid problems, cap the amount of slots to - // the size of the reorder buffer. - Quantity = std::min(Quantity, static_cast(Queue.size())); - return AvailableSlots >= Quantity; - } - - // Reserves a number of slots, and returns a new token. - unsigned reserveSlot(unsigned Index, unsigned NumMicroOps); - - /// Retires instructions in program order. - void cycleEvent(); - - void onInstructionExecuted(unsigned TokenID); - -#ifndef NDEBUG - void dump() const; -#endif -}; - // Implements the hardware dispatch logic. // // This class is responsible for the dispatch stage, in which instructions are Index: Dispatch.cpp =================================================================== --- Dispatch.cpp +++ Dispatch.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// \file /// -/// This file implements methods declared by class RegisterFile, DispatchUnit -/// and RetireControlUnit. +/// This file implements methods declared by class RegisterFile and +/// DispatchUnit. /// //===----------------------------------------------------------------------===// @@ -252,41 +252,6 @@ } #endif -RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM, - DispatchUnit *DU) - : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DU) { - // Check if the scheduling model provides extra information about the machine - // processor. If so, then use that information to set the reorder buffer size - // and the maximum number of instructions retired per cycle. - if (SM.hasExtraProcessorInfo()) { - const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); - if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; - MaxRetirePerCycle = EPI.MaxRetirePerCycle; - } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); -} - -// Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(unsigned Index, unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps)); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast(Queue.size())); - // Zero latency instructions may have zero mOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); - unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {Index, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; - NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; - return TokenID; -} - void DispatchUnit::notifyInstructionDispatched(unsigned Index, ArrayRef UsedRegs) { DEBUG(dbgs() << "[E] Instruction Dispatched: " << Index << '\n'); @@ -304,39 +269,6 @@ Owner->eraseInstruction(Index); } -void RetireControlUnit::cycleEvent() { - if (isEmpty()) - return; - - unsigned NumRetired = 0; - while (!isEmpty()) { - if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) - break; - RUToken &Current = Queue[CurrentInstructionSlotIdx]; - assert(Current.NumSlots && "Reserved zero slots?"); - if (!Current.Executed) - break; - Owner->notifyInstructionRetired(Current.Index); - CurrentInstructionSlotIdx += Current.NumSlots; - CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; - NumRetired++; - } -} - -void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { - assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].Index != ~0U); - Queue[TokenID].Executed = true; -} - -#ifndef NDEBUG -void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; -} -#endif - bool DispatchUnit::checkRAT(unsigned Index, const Instruction &Instr) { SmallVector RegDefs; for (const std::unique_ptr &RegDef : Instr.getDefs()) Index: RetireControlUnit.h =================================================================== --- RetireControlUnit.h +++ RetireControlUnit.h @@ -0,0 +1,91 @@ +//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the logic for retiring instructions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H +#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H + +#include "llvm/MC/MCSchedule.h" + +namespace mca { + +class DispatchUnit; + +/// This class tracks which instructions are in-flight (i.e., dispatched but not +/// retired) in the OoO backend. +// +/// This class checks on every cycle if/which instructions can be retired. +/// Instructions are retired in program order. +/// In the event of instruction retired, the DispatchUnit object that owns +/// this RetireControlUnit (RCU) gets notified. +/// On instruction retired, register updates are all architecturally +/// committed, and any temporary registers originally allocated for the +/// retired instruction are freed. +struct RetireControlUnit { + // A RUToken is created by the RCU for every instruction dispatched to the + // schedulers. These "tokens" are managed by the RCU in its token Queue. + // + // On evey cycle ('cycleEvent'), the RCU iterates through the token queue + // looking for any token with its 'Executed' flag set. If a token has that + // flag set, then the instruction has reached the write-back stage and will + // be retired by the RCU. + // + // 'NumSlots' represents the number of entries consumed by the instruction in + // the reorder buffer. Those entries will become available again once the + // instruction is retired. + // + // Note that the size of the reorder buffer is defined by the scheduling + // model via field 'NumMicroOpBufferSize'. + struct RUToken { + unsigned Index; // Instruction index. + unsigned NumSlots; // Slots reserved to this instruction. + bool Executed; // True if the instruction is past the WB stage. + }; + +private: + unsigned NextAvailableSlotIdx; + unsigned CurrentInstructionSlotIdx; + unsigned AvailableSlots; + unsigned MaxRetirePerCycle; // 0 means no limit. + std::vector Queue; + DispatchUnit *Owner; + +public: + RetireControlUnit(const llvm::MCSchedModel &SM, DispatchUnit *DU); + + bool isFull() const { return !AvailableSlots; } + bool isEmpty() const { return AvailableSlots == Queue.size(); } + bool isAvailable(unsigned Quantity = 1) const { + // Some instructions may declare a number of uOps which exceedes the size + // of the reorder buffer. To avoid problems, cap the amount of slots to + // the size of the reorder buffer. + Quantity = std::min(Quantity, static_cast(Queue.size())); + return AvailableSlots >= Quantity; + } + + // Reserves a number of slots, and returns a new token. + unsigned reserveSlot(unsigned Index, unsigned NumMicroOps); + + /// Retires instructions in program order. + void cycleEvent(); + + void onInstructionExecuted(unsigned TokenID); + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H Index: RetireControlUnit.cpp =================================================================== --- RetireControlUnit.cpp +++ RetireControlUnit.cpp @@ -0,0 +1,80 @@ +#include "Dispatch.h" +#include "RetireControlUnit.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM, + DispatchUnit *DU) + : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), + AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DU) { + // Check if the scheduling model provides extra information about the machine + // processor. If so, then use that information to set the reorder buffer size + // and the maximum number of instructions retired per cycle. + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (EPI.ReorderBufferSize) + AvailableSlots = EPI.ReorderBufferSize; + MaxRetirePerCycle = EPI.MaxRetirePerCycle; + } + + assert(AvailableSlots && "Invalid reorder buffer size!"); + Queue.resize(AvailableSlots); +} + +// Reserves a number of slots, and returns a new token. +unsigned RetireControlUnit::reserveSlot(unsigned Index, unsigned NumMicroOps) { + assert(isAvailable(NumMicroOps)); + unsigned NormalizedQuantity = + std::min(NumMicroOps, static_cast(Queue.size())); + // Zero latency instructions may have zero mOps. Artificially bump this + // value to 1. Although zero latency instructions don't consume scheduler + // resources, they still consume one slot in the retire queue. + NormalizedQuantity = std::max(NormalizedQuantity, 1U); + unsigned TokenID = NextAvailableSlotIdx; + Queue[NextAvailableSlotIdx] = {Index, NormalizedQuantity, false}; + NextAvailableSlotIdx += NormalizedQuantity; + NextAvailableSlotIdx %= Queue.size(); + AvailableSlots -= NormalizedQuantity; + return TokenID; +} + +void RetireControlUnit::cycleEvent() { + if (isEmpty()) + return; + + unsigned NumRetired = 0; + while (!isEmpty()) { + if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) + break; + RUToken &Current = Queue[CurrentInstructionSlotIdx]; + assert(Current.NumSlots && "Reserved zero slots?"); + if (!Current.Executed) + break; + Owner->notifyInstructionRetired(Current.Index); + CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx %= Queue.size(); + AvailableSlots += Current.NumSlots; + NumRetired++; + } +} + +void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { + assert(Queue.size() > TokenID); + assert(Queue[TokenID].Executed == false && Queue[TokenID].Index != ~0U); + Queue[TokenID].Executed = true; +} + +#ifndef NDEBUG +void RetireControlUnit::dump() const { + dbgs() << "Retire Unit: { Total Slots=" << Queue.size() + << ", Available Slots=" << AvailableSlots << " }\n"; +} +#endif + +} // namespace mca