Index: llvm/trunk/tools/llvm-mca/CMakeLists.txt =================================================================== --- llvm/trunk/tools/llvm-mca/CMakeLists.txt +++ llvm/trunk/tools/llvm-mca/CMakeLists.txt @@ -1,3 +1,5 @@ +include_directories(include) + set(LLVM_LINK_COMPONENTS AllTargetsAsmPrinters AllTargetsAsmParsers @@ -12,25 +14,7 @@ add_llvm_tool(llvm-mca llvm-mca.cpp CodeRegion.cpp - Context.cpp - DispatchStage.cpp - ExecuteStage.cpp - FetchStage.cpp - HWEventListener.cpp - HardwareUnit.cpp - InstrBuilder.cpp - Instruction.cpp - InstructionTables.cpp - LSUnit.cpp - Pipeline.cpp PipelinePrinter.cpp - RegisterFile.cpp - ResourceManager.cpp - RetireControlUnit.cpp - RetireStage.cpp - Scheduler.cpp - Stage.cpp - Support.cpp Views/DispatchStatistics.cpp Views/InstructionInfoView.cpp Views/RegisterFileStatistics.cpp @@ -41,3 +25,7 @@ Views/TimelineView.cpp Views/View.cpp ) + +set(LLVM_MCA_SOURCE_DIR ${CURRENT_SOURCE_DIR}) +add_subdirectory(lib) +target_link_libraries(llvm-mca PRIVATE LLVMMCA) Index: llvm/trunk/tools/llvm-mca/Context.h =================================================================== --- llvm/trunk/tools/llvm-mca/Context.h +++ llvm/trunk/tools/llvm-mca/Context.h @@ -1,68 +0,0 @@ -//===---------------------------- Context.h ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a class for holding ownership of various simulated -/// hardware units. A Context also provides a utility routine for constructing -/// a default out-of-order pipeline with fetch, dispatch, execute, and retire -/// stages. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_CONTEXT_H -#define LLVM_TOOLS_LLVM_MCA_CONTEXT_H -#include "HardwareUnit.h" -#include "InstrBuilder.h" -#include "Pipeline.h" -#include "SourceMgr.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSchedule.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include - -namespace mca { - -/// This is a convenience struct to hold the parameters necessary for creating -/// the pre-built "default" out-of-order pipeline. -struct PipelineOptions { - PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS, - bool NoAlias) - : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS), - StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {} - unsigned DispatchWidth; - unsigned RegisterFileSize; - unsigned LoadQueueSize; - unsigned StoreQueueSize; - bool AssumeNoAlias; -}; - -class Context { - llvm::SmallVector, 4> Hardware; - const llvm::MCRegisterInfo &MRI; - const llvm::MCSubtargetInfo &STI; - -public: - Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S) - : MRI(R), STI(S) {} - Context(const Context &C) = delete; - Context &operator=(const Context &C) = delete; - - void addHardwareUnit(std::unique_ptr H) { - Hardware.push_back(std::move(H)); - } - - /// Construct a basic pipeline for simulating an out-of-order pipeline. - /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages. - std::unique_ptr createDefaultPipeline(const PipelineOptions &Opts, - InstrBuilder &IB, - SourceMgr &SrcMgr); -}; - -} // namespace mca -#endif // LLVM_TOOLS_LLVM_MCA_CONTEXT_H Index: llvm/trunk/tools/llvm-mca/Context.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Context.cpp +++ llvm/trunk/tools/llvm-mca/Context.cpp @@ -1,65 +0,0 @@ -//===---------------------------- Context.cpp -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a class for holding ownership of various simulated -/// hardware units. A Context also provides a utility routine for constructing -/// a default out-of-order pipeline with fetch, dispatch, execute, and retire -/// stages. -/// -//===----------------------------------------------------------------------===// - -#include "Context.h" -#include "DispatchStage.h" -#include "ExecuteStage.h" -#include "FetchStage.h" -#include "RegisterFile.h" -#include "RetireControlUnit.h" -#include "RetireStage.h" -#include "Scheduler.h" - -namespace mca { - -using namespace llvm; - -std::unique_ptr -Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, - SourceMgr &SrcMgr) { - const MCSchedModel &SM = STI.getSchedModel(); - - // Create the hardware units defining the backend. - auto RCU = llvm::make_unique(SM); - auto PRF = llvm::make_unique(SM, MRI, Opts.RegisterFileSize); - auto LSU = llvm::make_unique(Opts.LoadQueueSize, Opts.StoreQueueSize, - Opts.AssumeNoAlias); - auto HWS = llvm::make_unique(SM, LSU.get()); - - // Create the pipeline and its stages. - auto StagePipeline = llvm::make_unique(); - auto Fetch = llvm::make_unique(IB, SrcMgr); - auto Dispatch = llvm::make_unique( - STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF); - auto Execute = llvm::make_unique(*HWS); - auto Retire = llvm::make_unique(*RCU, *PRF); - - // Pass the ownership of all the hardware units to this Context. - addHardwareUnit(std::move(RCU)); - addHardwareUnit(std::move(PRF)); - addHardwareUnit(std::move(LSU)); - addHardwareUnit(std::move(HWS)); - - // Build the pipeline. - StagePipeline->appendStage(std::move(Fetch)); - StagePipeline->appendStage(std::move(Dispatch)); - StagePipeline->appendStage(std::move(Execute)); - StagePipeline->appendStage(std::move(Retire)); - return StagePipeline; -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/DispatchStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/DispatchStage.h +++ llvm/trunk/tools/llvm-mca/DispatchStage.h @@ -1,95 +0,0 @@ -//===----------------------- DispatchStage.h --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file models the dispatch component of an instruction pipeline. -/// -/// The DispatchStage is responsible for updating instruction dependencies -/// and communicating to the simulated instruction scheduler that an instruction -/// is ready to be scheduled for execution. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H - -#include "HWEventListener.h" -#include "Instruction.h" -#include "RegisterFile.h" -#include "RetireControlUnit.h" -#include "Stage.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace mca { - -// Implements the hardware dispatch logic. -// -// This class is responsible for the dispatch stage, in which instructions are -// dispatched in groups to the Scheduler. An instruction can be dispatched if -// the following conditions are met: -// 1) There are enough entries in the reorder buffer (see class -// RetireControlUnit) to write the opcodes associated with the instruction. -// 2) There are enough physical registers to rename output register operands. -// 3) There are enough entries available in the used buffered resource(s). -// -// The number of micro opcodes that can be dispatched in one cycle is limited by -// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when -// processor resources are not available. Dispatch stall events are counted -// during the entire execution of the code, and displayed by the performance -// report when flag '-dispatch-stats' is specified. -// -// If the number of micro opcodes exceedes DispatchWidth, then the instruction -// is dispatched in multiple cycles. -class DispatchStage final : public Stage { - unsigned DispatchWidth; - unsigned AvailableEntries; - unsigned CarryOver; - const llvm::MCSubtargetInfo &STI; - RetireControlUnit &RCU; - RegisterFile &PRF; - - bool checkRCU(const InstRef &IR) const; - bool checkPRF(const InstRef &IR) const; - bool canDispatch(const InstRef &IR) const; - llvm::Error dispatch(InstRef IR); - - void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI); - - void notifyInstructionDispatched(const InstRef &IR, - llvm::ArrayRef UsedPhysRegs); - - void collectWrites(llvm::SmallVectorImpl &Vec, - unsigned RegID) const { - return PRF.collectWrites(Vec, RegID); - } - -public: - DispatchStage(const llvm::MCSubtargetInfo &Subtarget, - const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize, - unsigned MaxDispatchWidth, RetireControlUnit &R, - RegisterFile &F) - : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), - CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) {} - - bool isAvailable(const InstRef &IR) const override; - - // The dispatch logic internally doesn't buffer instructions. So there is - // never work to do at the beginning of every cycle. - bool hasWorkToComplete() const override { return false; } - llvm::Error cycleStart() override; - llvm::Error execute(InstRef &IR) override; - -#ifndef NDEBUG - void dump() const; -#endif -}; -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H Index: llvm/trunk/tools/llvm-mca/DispatchStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/DispatchStage.cpp +++ llvm/trunk/tools/llvm-mca/DispatchStage.cpp @@ -1,160 +0,0 @@ -//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file models the dispatch component of an instruction pipeline. -/// -/// The DispatchStage is responsible for updating instruction dependencies -/// and communicating to the simulated instruction scheduler that an instruction -/// is ready to be scheduled for execution. -/// -//===----------------------------------------------------------------------===// - -#include "DispatchStage.h" -#include "HWEventListener.h" -#include "Scheduler.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -#define DEBUG_TYPE "llvm-mca" - -namespace mca { - -void DispatchStage::notifyInstructionDispatched(const InstRef &IR, - ArrayRef UsedRegs) { - LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); - notifyEvent(HWInstructionDispatchedEvent(IR, UsedRegs)); -} - -bool DispatchStage::checkPRF(const InstRef &IR) const { - SmallVector RegDefs; - for (const std::unique_ptr &RegDef : - IR.getInstruction()->getDefs()) - RegDefs.emplace_back(RegDef->getRegisterID()); - - const unsigned RegisterMask = PRF.isAvailable(RegDefs); - // A mask with all zeroes means: register files are available. - if (RegisterMask) { - notifyEvent( - HWStallEvent(HWStallEvent::RegisterFileStall, IR)); - return false; - } - - return true; -} - -bool DispatchStage::checkRCU(const InstRef &IR) const { - const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; - if (RCU.isAvailable(NumMicroOps)) - return true; - notifyEvent( - HWStallEvent(HWStallEvent::RetireControlUnitStall, IR)); - return false; -} - -bool DispatchStage::canDispatch(const InstRef &IR) const { - return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR); -} - -void DispatchStage::updateRAWDependencies(ReadState &RS, - const MCSubtargetInfo &STI) { - SmallVector DependentWrites; - - collectWrites(DependentWrites, RS.getRegisterID()); - RS.setDependentWrites(DependentWrites.size()); - // We know that this read depends on all the writes in DependentWrites. - // For each write, check if we have ReadAdvance information, and use it - // to figure out in how many cycles this read becomes available. - const ReadDescriptor &RD = RS.getDescriptor(); - const MCSchedModel &SM = STI.getSchedModel(); - const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); - for (WriteRef &WR : DependentWrites) { - WriteState &WS = *WR.getWriteState(); - unsigned WriteResID = WS.getWriteResourceID(); - int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); - WS.addUser(&RS, ReadAdvance); - } -} - -llvm::Error DispatchStage::dispatch(InstRef IR) { - assert(!CarryOver && "Cannot dispatch another instruction!"); - Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - const unsigned NumMicroOps = Desc.NumMicroOps; - if (NumMicroOps > DispatchWidth) { - assert(AvailableEntries == DispatchWidth); - AvailableEntries = 0; - CarryOver = NumMicroOps - DispatchWidth; - } else { - assert(AvailableEntries >= NumMicroOps); - AvailableEntries -= NumMicroOps; - } - - // A dependency-breaking instruction doesn't have to wait on the register - // input operands, and it is often optimized at register renaming stage. - // Update RAW dependencies if this instruction is not a dependency-breaking - // instruction. A dependency-breaking instruction is a zero-latency - // instruction that doesn't consume hardware resources. - // An example of dependency-breaking instruction on X86 is a zero-idiom XOR. - bool IsDependencyBreaking = IS.isDependencyBreaking(); - for (std::unique_ptr &RS : IS.getUses()) - if (RS->isImplicitRead() || !IsDependencyBreaking) - updateRAWDependencies(*RS, STI); - - // By default, a dependency-breaking zero-latency instruction is expected to - // be optimized at register renaming stage. That means, no physical register - // is allocated to the instruction. - bool ShouldAllocateRegisters = - !(Desc.isZeroLatency() && IsDependencyBreaking); - SmallVector RegisterFiles(PRF.getNumRegisterFiles()); - for (std::unique_ptr &WS : IS.getDefs()) { - PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles, - ShouldAllocateRegisters); - } - - // Reserve slots in the RCU, and notify the instruction that it has been - // dispatched to the schedulers for execution. - IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); - - // Notify listeners of the "instruction dispatched" event, - // and move IR to the next stage. - notifyInstructionDispatched(IR, RegisterFiles); - return moveToTheNextStage(IR); -} - -llvm::Error DispatchStage::cycleStart() { - AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; - CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U; - return llvm::ErrorSuccess(); -} - -bool DispatchStage::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); - if (Required > AvailableEntries) - return false; - // The dispatch logic doesn't internally buffer instructions. It only accepts - // instructions that can be successfully moved to the next stage during this - // same cycle. - return canDispatch(IR); -} - -llvm::Error DispatchStage::execute(InstRef &IR) { - assert(canDispatch(IR) && "Cannot dispatch another instruction!"); - return dispatch(IR); -} - -#ifndef NDEBUG -void DispatchStage::dump() const { - PRF.dump(); - RCU.dump(); -} -#endif -} // namespace mca Index: llvm/trunk/tools/llvm-mca/ExecuteStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/ExecuteStage.h +++ llvm/trunk/tools/llvm-mca/ExecuteStage.h @@ -1,78 +0,0 @@ -//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the execution stage of a default instruction pipeline. -/// -/// The ExecuteStage is responsible for managing the hardware scheduler -/// and issuing notifications that an instruction has been executed. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H - -#include "Instruction.h" -#include "Scheduler.h" -#include "Stage.h" -#include "llvm/ADT/ArrayRef.h" - -namespace mca { - -class ExecuteStage final : public Stage { - Scheduler &HWS; - - llvm::Error issueInstruction(InstRef &IR); - - // Called at the beginning of each cycle to issue already dispatched - // instructions to the underlying pipelines. - llvm::Error issueReadyInstructions(); - - ExecuteStage(const ExecuteStage &Other) = delete; - ExecuteStage &operator=(const ExecuteStage &Other) = delete; - -public: - ExecuteStage(Scheduler &S) : Stage(), HWS(S) {} - - // This stage works under the assumption that the Pipeline will eventually - // execute a retire stage. We don't need to check if pipelines and/or - // schedulers have instructions to process, because those instructions are - // also tracked by the retire control unit. That means, - // RetireControlUnit::hasWorkToComplete() is responsible for checking if there - // are still instructions in-flight in the out-of-order backend. - bool hasWorkToComplete() const override { return false; } - bool isAvailable(const InstRef &IR) const override; - - // Notifies the scheduler that a new cycle just started. - // - // This method notifies the scheduler that a new cycle started. - // This method is also responsible for notifying listeners about instructions - // state changes, and processor resources freed by the scheduler. - // Instructions that transitioned to the 'Executed' state are automatically - // moved to the next stage (i.e. RetireStage). - llvm::Error cycleStart() override; - llvm::Error execute(InstRef &IR) override; - - void - notifyInstructionIssued(const InstRef &IR, - llvm::ArrayRef> Used); - void notifyInstructionExecuted(const InstRef &IR); - void notifyInstructionReady(const InstRef &IR); - void notifyResourceAvailable(const ResourceRef &RR); - - // Notify listeners that buffered resources were consumed. - void notifyReservedBuffers(llvm::ArrayRef Buffers); - - // Notify listeners that buffered resources were freed. - void notifyReleasedBuffers(llvm::ArrayRef Buffers); -}; - -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H Index: llvm/trunk/tools/llvm-mca/ExecuteStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/ExecuteStage.cpp +++ llvm/trunk/tools/llvm-mca/ExecuteStage.cpp @@ -1,195 +0,0 @@ -//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the execution stage of an instruction pipeline. -/// -/// The ExecuteStage is responsible for managing the hardware scheduler -/// and issuing notifications that an instruction has been executed. -/// -//===----------------------------------------------------------------------===// - -#include "ExecuteStage.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace mca { - -using namespace llvm; - -HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) { - switch (Status) { - case Scheduler::SC_LOAD_QUEUE_FULL: - return HWStallEvent::LoadQueueFull; - case Scheduler::SC_STORE_QUEUE_FULL: - return HWStallEvent::StoreQueueFull; - case Scheduler::SC_BUFFERS_FULL: - return HWStallEvent::SchedulerQueueFull; - case Scheduler::SC_DISPATCH_GROUP_STALL: - return HWStallEvent::DispatchGroupStall; - case Scheduler::SC_AVAILABLE: - return HWStallEvent::Invalid; - } - - llvm_unreachable("Don't know how to process this StallKind!"); -} - -bool ExecuteStage::isAvailable(const InstRef &IR) const { - if (Scheduler::Status S = HWS.isAvailable(IR)) { - HWStallEvent::GenericEventType ET = toHWStallEventType(S); - notifyEvent(HWStallEvent(ET, IR)); - return false; - } - - return true; -} - -Error ExecuteStage::issueInstruction(InstRef &IR) { - SmallVector, 4> Used; - SmallVector Ready; - HWS.issueInstruction(IR, Used, Ready); - - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - notifyReleasedBuffers(Desc.Buffers); - notifyInstructionIssued(IR, Used); - if (IR.getInstruction()->isExecuted()) { - notifyInstructionExecuted(IR); - //FIXME: add a buffer of executed instructions. - if (Error S = moveToTheNextStage(IR)) - return S; - } - - for (const InstRef &I : Ready) - notifyInstructionReady(I); - return ErrorSuccess(); -} - -Error ExecuteStage::issueReadyInstructions() { - InstRef IR = HWS.select(); - while (IR.isValid()) { - if (Error Err = issueInstruction(IR)) - return Err; - - // Select the next instruction to issue. - IR = HWS.select(); - } - - return ErrorSuccess(); -} - -Error ExecuteStage::cycleStart() { - llvm::SmallVector Freed; - llvm::SmallVector Executed; - llvm::SmallVector Ready; - - HWS.cycleEvent(Freed, Executed, Ready); - - for (const ResourceRef &RR : Freed) - notifyResourceAvailable(RR); - - for (InstRef &IR : Executed) { - notifyInstructionExecuted(IR); - //FIXME: add a buffer of executed instructions. - if (Error S = moveToTheNextStage(IR)) - return S; - } - - for (const InstRef &IR : Ready) - notifyInstructionReady(IR); - - return issueReadyInstructions(); -} - -// Schedule the instruction for execution on the hardware. -Error ExecuteStage::execute(InstRef &IR) { - assert(isAvailable(IR) && "Scheduler is not available!"); - -#ifndef NDEBUG - // Ensure that the HWS has not stored this instruction in its queues. - HWS.sanityCheck(IR); -#endif - // Reserve a slot in each buffered resource. Also, mark units with - // BufferSize=0 as reserved. Resources with a buffer size of zero will only - // be released after MCIS is issued, and all the ResourceCycles for those - // units have been consumed. - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - HWS.dispatch(IR); - notifyReservedBuffers(Desc.Buffers); - if (!HWS.isReady(IR)) - return ErrorSuccess(); - - // If we did not return early, then the scheduler is ready for execution. - notifyInstructionReady(IR); - - // If we cannot issue immediately, the HWS will add IR to its ready queue for - // execution later, so we must return early here. - if (!HWS.mustIssueImmediately(IR)) - return ErrorSuccess(); - - // Issue IR to the underlying pipelines. - return issueInstruction(IR); -} - -void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) { - LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n'); - notifyEvent( - HWInstructionEvent(HWInstructionEvent::Executed, IR)); -} - -void ExecuteStage::notifyInstructionReady(const InstRef &IR) { - LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n'); - notifyEvent( - HWInstructionEvent(HWInstructionEvent::Ready, IR)); -} - -void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) { - LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' - << RR.second << "]\n"); - for (HWEventListener *Listener : getListeners()) - Listener->onResourceAvailable(RR); -} - -void ExecuteStage::notifyInstructionIssued( - const InstRef &IR, ArrayRef> Used) { - LLVM_DEBUG({ - dbgs() << "[E] Instruction Issued: #" << IR << '\n'; - for (const std::pair &Resource : Used) { - dbgs() << "[E] Resource Used: [" << Resource.first.first << '.' - << Resource.first.second << "], "; - dbgs() << "cycles: " << Resource.second << '\n'; - } - }); - notifyEvent(HWInstructionIssuedEvent(IR, Used)); -} - -void ExecuteStage::notifyReservedBuffers(ArrayRef Buffers) { - if (Buffers.empty()) - return; - - SmallVector BufferIDs(Buffers.begin(), Buffers.end()); - std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); - for (HWEventListener *Listener : getListeners()) - Listener->onReservedBuffers(BufferIDs); -} - -void ExecuteStage::notifyReleasedBuffers(ArrayRef Buffers) { - if (Buffers.empty()) - return; - - SmallVector BufferIDs(Buffers.begin(), Buffers.end()); - std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); - for (HWEventListener *Listener : getListeners()) - Listener->onReleasedBuffers(BufferIDs); -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/FetchStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/FetchStage.h +++ llvm/trunk/tools/llvm-mca/FetchStage.h @@ -1,52 +0,0 @@ -//===---------------------- FetchStage.h ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the Fetch stage of an instruction pipeline. Its sole -/// purpose in life is to produce instructions for the rest of the pipeline. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H - -#include "InstrBuilder.h" -#include "SourceMgr.h" -#include "Stage.h" -#include - -namespace mca { - -class FetchStage final : public Stage { - std::unique_ptr CurrentInstruction; - using InstMap = std::map>; - InstMap Instructions; - InstrBuilder &IB; - SourceMgr &SM; - - // Updates the program counter, and sets 'CurrentInstruction'. - llvm::Error getNextInstruction(); - - FetchStage(const FetchStage &Other) = delete; - FetchStage &operator=(const FetchStage &Other) = delete; - -public: - FetchStage(InstrBuilder &IB, SourceMgr &SM) - : CurrentInstruction(), IB(IB), SM(SM) {} - - bool isAvailable(const InstRef &IR) const override; - bool hasWorkToComplete() const override; - llvm::Error execute(InstRef &IR) override; - llvm::Error cycleStart() override; - llvm::Error cycleEnd() override; -}; - -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H Index: llvm/trunk/tools/llvm-mca/FetchStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/FetchStage.cpp +++ llvm/trunk/tools/llvm-mca/FetchStage.cpp @@ -1,82 +0,0 @@ -//===---------------------- FetchStage.cpp ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the Fetch stage of an instruction pipeline. Its sole -/// purpose in life is to produce instructions for the rest of the pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "FetchStage.h" - -namespace mca { - -bool FetchStage::hasWorkToComplete() const { - return CurrentInstruction.get() || SM.hasNext(); -} - -bool FetchStage::isAvailable(const InstRef & /* unused */) const { - if (!CurrentInstruction) - return false; - assert(SM.hasNext() && "Unexpected internal state!"); - const SourceRef SR = SM.peekNext(); - InstRef IR(SR.first, CurrentInstruction.get()); - return checkNextStage(IR); -} - -llvm::Error FetchStage::getNextInstruction() { - assert(!CurrentInstruction && "There is already an instruction to process!"); - if (!SM.hasNext()) - return llvm::ErrorSuccess(); - const SourceRef SR = SM.peekNext(); - llvm::Expected> InstOrErr = - IB.createInstruction(*SR.second); - if (!InstOrErr) - return InstOrErr.takeError(); - CurrentInstruction = std::move(InstOrErr.get()); - return llvm::ErrorSuccess(); -} - -llvm::Error FetchStage::execute(InstRef & /*unused */) { - assert(CurrentInstruction && "There is no instruction to process!"); - const SourceRef SR = SM.peekNext(); - InstRef IR(SR.first, CurrentInstruction.get()); - assert(checkNextStage(IR) && "Invalid fetch!"); - - Instructions[IR.getSourceIndex()] = std::move(CurrentInstruction); - if (llvm::Error Val = moveToTheNextStage(IR)) - return Val; - - SM.updateNext(); - - // Move the program counter. - return getNextInstruction(); -} - -llvm::Error FetchStage::cycleStart() { - if (!CurrentInstruction && SM.hasNext()) - return getNextInstruction(); - return llvm::ErrorSuccess(); -} - -llvm::Error FetchStage::cycleEnd() { - // Find the first instruction which hasn't been retired. - const InstMap::iterator It = - llvm::find_if(Instructions, [](const InstMap::value_type &KeyValuePair) { - return !KeyValuePair.second->isRetired(); - }); - - // Erase instructions up to the first that hasn't been retired. - if (It != Instructions.begin()) - Instructions.erase(Instructions.begin(), It); - - return llvm::ErrorSuccess(); -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/HWEventListener.h =================================================================== --- llvm/trunk/tools/llvm-mca/HWEventListener.h +++ llvm/trunk/tools/llvm-mca/HWEventListener.h @@ -1,141 +0,0 @@ -//===----------------------- HWEventListener.h ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the main interface for hardware event listeners. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H -#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H - -#include "Instruction.h" -#include "llvm/ADT/ArrayRef.h" -#include - -namespace mca { - -// An HWInstructionEvent represents state changes of instructions that -// listeners might be interested in. Listeners can choose to ignore any event -// they are not interested in. -class HWInstructionEvent { -public: - // This is the list of event types that are shared by all targets, that - // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent, - // ...) and generic Views can manipulate. - // Subtargets are free to define additional event types, that are goin to be - // handled by generic components as opaque values, but can still be - // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage, - // DispatchStage, ...) and interpreted by subtarget-specific EventListener - // implementations. - enum GenericEventType { - Invalid = 0, - // Events generated by the Retire Control Unit. - Retired, - // Events generated by the Scheduler. - Ready, - Issued, - Executed, - // Events generated by the Dispatch logic. - Dispatched, - - LastGenericEventType, - }; - - HWInstructionEvent(unsigned type, const InstRef &Inst) - : Type(type), IR(Inst) {} - - // The event type. The exact meaning depends on the subtarget. - const unsigned Type; - - // The instruction this event was generated for. - const InstRef &IR; -}; - -class HWInstructionIssuedEvent : public HWInstructionEvent { -public: - using ResourceRef = std::pair; - HWInstructionIssuedEvent(const InstRef &IR, - llvm::ArrayRef> UR) - : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {} - - llvm::ArrayRef> UsedResources; -}; - -class HWInstructionDispatchedEvent : public HWInstructionEvent { -public: - HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef Regs) - : HWInstructionEvent(HWInstructionEvent::Dispatched, IR), - UsedPhysRegs(Regs) {} - // Number of physical register allocated for this instruction. There is one - // entry per register file. - llvm::ArrayRef UsedPhysRegs; -}; - -class HWInstructionRetiredEvent : public HWInstructionEvent { -public: - HWInstructionRetiredEvent(const InstRef &IR, llvm::ArrayRef Regs) - : HWInstructionEvent(HWInstructionEvent::Retired, IR), - FreedPhysRegs(Regs) {} - // Number of register writes that have been architecturally committed. There - // is one entry per register file. - llvm::ArrayRef FreedPhysRegs; -}; - -// A HWStallEvent represents a pipeline stall caused by the lack of hardware -// resources. -class HWStallEvent { -public: - enum GenericEventType { - Invalid = 0, - // Generic stall events generated by the DispatchStage. - RegisterFileStall, - RetireControlUnitStall, - // Generic stall events generated by the Scheduler. - DispatchGroupStall, - SchedulerQueueFull, - LoadQueueFull, - StoreQueueFull, - LastGenericEvent - }; - - HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {} - - // The exact meaning of the stall event type depends on the subtarget. - const unsigned Type; - - // The instruction this event was generated for. - const InstRef &IR; -}; - -class HWEventListener { -public: - // Generic events generated by the pipeline. - virtual void onCycleBegin() {} - virtual void onCycleEnd() {} - - virtual void onEvent(const HWInstructionEvent &Event) {} - virtual void onEvent(const HWStallEvent &Event) {} - - using ResourceRef = std::pair; - virtual void onResourceAvailable(const ResourceRef &RRef) {} - - // Events generated by the Scheduler when buffered resources are - // consumed/freed. - virtual void onReservedBuffers(llvm::ArrayRef Buffers) {} - virtual void onReleasedBuffers(llvm::ArrayRef Buffers) {} - - virtual ~HWEventListener() {} - -private: - virtual void anchor(); -}; -} // namespace mca - -#endif Index: llvm/trunk/tools/llvm-mca/HWEventListener.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/HWEventListener.cpp +++ llvm/trunk/tools/llvm-mca/HWEventListener.cpp @@ -1,21 +0,0 @@ -//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a vtable anchor for class HWEventListener. -/// -//===----------------------------------------------------------------------===// - -#include "HWEventListener.h" - -namespace mca { - -// Anchor the vtable here. -void HWEventListener::anchor() {} -} // namespace mca Index: llvm/trunk/tools/llvm-mca/HardwareUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/HardwareUnit.h +++ llvm/trunk/tools/llvm-mca/HardwareUnit.h @@ -1,31 +0,0 @@ -//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a base class for describing a simulated hardware -/// unit. These units are used to construct a simulated backend. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H -#define LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H - -namespace mca { - -class HardwareUnit { - HardwareUnit(const HardwareUnit &H) = delete; - HardwareUnit &operator=(const HardwareUnit &H) = delete; - -public: - HardwareUnit() = default; - virtual ~HardwareUnit(); -}; - -} // namespace mca -#endif // LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H Index: llvm/trunk/tools/llvm-mca/HardwareUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/HardwareUnit.cpp +++ llvm/trunk/tools/llvm-mca/HardwareUnit.cpp @@ -1,23 +0,0 @@ -//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the anchor for the base class that describes -/// simulated hardware units. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnit.h" - -namespace mca { - -// Pin the vtable with this method. -HardwareUnit::~HardwareUnit() = default; - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/InstrBuilder.h =================================================================== --- llvm/trunk/tools/llvm-mca/InstrBuilder.h +++ llvm/trunk/tools/llvm-mca/InstrBuilder.h @@ -1,90 +0,0 @@ -//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A builder class for instructions that are statically analyzed by llvm-mca. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H -#define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H - -#include "Instruction.h" -#include "Support.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Error.h" - -namespace mca { - -class DispatchUnit; - -/// A builder class that knows how to construct Instruction objects. -/// -/// Every llvm-mca Instruction is described by an object of class InstrDesc. -/// An InstrDesc describes which registers are read/written by the instruction, -/// as well as the instruction latency and hardware resources consumed. -/// -/// This class is used by the tool to construct Instructions and instruction -/// descriptors (i.e. InstrDesc objects). -/// Information from the machine scheduling model is used to identify processor -/// resources that are consumed by an instruction. -class InstrBuilder { - const llvm::MCSubtargetInfo &STI; - const llvm::MCInstrInfo &MCII; - const llvm::MCRegisterInfo &MRI; - const llvm::MCInstrAnalysis &MCIA; - llvm::MCInstPrinter &MCIP; - llvm::SmallVector ProcResourceMasks; - - llvm::DenseMap> Descriptors; - llvm::DenseMap> - VariantDescriptors; - - llvm::Expected - createInstrDescImpl(const llvm::MCInst &MCI); - llvm::Expected - getOrCreateInstrDesc(const llvm::MCInst &MCI); - - InstrBuilder(const InstrBuilder &) = delete; - InstrBuilder &operator=(const InstrBuilder &) = delete; - - llvm::Error populateWrites(InstrDesc &ID, const llvm::MCInst &MCI, - unsigned SchedClassID); - llvm::Error populateReads(InstrDesc &ID, const llvm::MCInst &MCI, - unsigned SchedClassID); - -public: - InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, - const llvm::MCRegisterInfo &mri, - const llvm::MCInstrAnalysis &mcia, llvm::MCInstPrinter &mcip) - : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), MCIP(mcip), - ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) { - computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); - } - - // Returns an array of processor resource masks. - // Masks are computed by function mca::computeProcResourceMasks. see - // Support.h for a description of how masks are computed and how masks can be - // used to solve set membership problems. - llvm::ArrayRef getProcResourceMasks() const { - return ProcResourceMasks; - } - - void clear() { VariantDescriptors.shrink_and_clear(); } - - llvm::Expected> - createInstruction(const llvm::MCInst &MCI); -}; -} // namespace mca - -#endif Index: llvm/trunk/tools/llvm-mca/InstrBuilder.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/InstrBuilder.cpp +++ llvm/trunk/tools/llvm-mca/InstrBuilder.cpp @@ -1,485 +0,0 @@ -//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements the InstrBuilder interface. -/// -//===----------------------------------------------------------------------===// - -#include "InstrBuilder.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace mca { - -using namespace llvm; - -static void initializeUsedResources(InstrDesc &ID, - const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI, - ArrayRef ProcResourceMasks) { - const MCSchedModel &SM = STI.getSchedModel(); - - // Populate resources consumed. - using ResourcePlusCycles = std::pair; - std::vector Worklist; - - // Track cycles contributed by resources that are in a "Super" relationship. - // This is required if we want to correctly match the behavior of method - // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set - // of "consumed" processor resources and resource cycles, the logic in - // ExpandProcResource() doesn't update the number of resource cycles - // contributed by a "Super" resource to a group. - // We need to take this into account when we find that a processor resource is - // part of a group, and it is also used as the "Super" of other resources. - // This map stores the number of cycles contributed by sub-resources that are - // part of a "Super" resource. The key value is the "Super" resource mask ID. - DenseMap SuperResources; - - for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { - const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; - const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); - uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; - if (PR.BufferSize != -1) - ID.Buffers.push_back(Mask); - CycleSegment RCy(0, PRE->Cycles, false); - Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); - if (PR.SuperIdx) { - uint64_t Super = ProcResourceMasks[PR.SuperIdx]; - SuperResources[Super] += PRE->Cycles; - } - } - - // Sort elements by mask popcount, so that we prioritize resource units over - // resource groups, and smaller groups over larger groups. - llvm::sort(Worklist.begin(), Worklist.end(), - [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { - unsigned popcntA = countPopulation(A.first); - unsigned popcntB = countPopulation(B.first); - if (popcntA < popcntB) - return true; - if (popcntA > popcntB) - return false; - return A.first < B.first; - }); - - uint64_t UsedResourceUnits = 0; - - // Remove cycles contributed by smaller resources. - for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { - ResourcePlusCycles &A = Worklist[I]; - if (!A.second.size()) { - A.second.NumUnits = 0; - A.second.setReserved(); - ID.Resources.emplace_back(A); - continue; - } - - ID.Resources.emplace_back(A); - uint64_t NormalizedMask = A.first; - if (countPopulation(A.first) == 1) { - UsedResourceUnits |= A.first; - } else { - // Remove the leading 1 from the resource group mask. - NormalizedMask ^= PowerOf2Floor(NormalizedMask); - } - - for (unsigned J = I + 1; J < E; ++J) { - ResourcePlusCycles &B = Worklist[J]; - if ((NormalizedMask & B.first) == NormalizedMask) { - B.second.CS.Subtract(A.second.size() - SuperResources[A.first]); - if (countPopulation(B.first) > 1) - B.second.NumUnits++; - } - } - } - - // A SchedWrite may specify a number of cycles in which a resource group - // is reserved. For example (on target x86; cpu Haswell): - // - // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { - // let ResourceCycles = [2, 2, 3]; - // } - // - // This means: - // Resource units HWPort0 and HWPort1 are both used for 2cy. - // Resource group HWPort01 is the union of HWPort0 and HWPort1. - // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 - // will not be usable for 2 entire cycles from instruction issue. - // - // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency - // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an - // extra delay on top of the 2 cycles latency. - // During those extra cycles, HWPort01 is not usable by other instructions. - for (ResourcePlusCycles &RPC : ID.Resources) { - if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { - // Remove the leading 1 from the resource group mask. - uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); - if ((Mask & UsedResourceUnits) == Mask) - RPC.second.setReserved(); - } - } - - LLVM_DEBUG({ - for (const std::pair &R : ID.Resources) - dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n'; - for (const uint64_t R : ID.Buffers) - dbgs() << "\t\tBuffer Mask=" << R << '\n'; - }); -} - -static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, - const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI) { - if (MCDesc.isCall()) { - // We cannot estimate how long this call will take. - // Artificially set an arbitrarily high latency (100cy). - ID.MaxLatency = 100U; - return; - } - - int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); - // If latency is unknown, then conservatively assume a MaxLatency of 100cy. - ID.MaxLatency = Latency < 0 ? 100U : static_cast(Latency); -} - -Error InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, - unsigned SchedClassID) { - const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); - const MCSchedModel &SM = STI.getSchedModel(); - const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); - - // These are for now the (strong) assumptions made by this algorithm: - // * The number of explicit and implicit register definitions in a MCInst - // matches the number of explicit and implicit definitions according to - // the opcode descriptor (MCInstrDesc). - // * Register definitions take precedence over register uses in the operands - // list. - // * If an opcode specifies an optional definition, then the optional - // definition is always the last operand in the sequence, and it can be - // set to zero (i.e. "no register"). - // - // These assumptions work quite well for most out-of-order in-tree targets - // like x86. This is mainly because the vast majority of instructions is - // expanded to MCInst using a straightforward lowering logic that preserves - // the ordering of the operands. - unsigned NumExplicitDefs = MCDesc.getNumDefs(); - unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); - unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; - unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; - if (MCDesc.hasOptionalDef()) - TotalDefs++; - ID.Writes.resize(TotalDefs); - // Iterate over the operands list, and skip non-register operands. - // The first NumExplictDefs register operands are expected to be register - // definitions. - unsigned CurrentDef = 0; - unsigned i = 0; - for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { - const MCOperand &Op = MCI.getOperand(i); - if (!Op.isReg()) - continue; - - WriteDescriptor &Write = ID.Writes[CurrentDef]; - Write.OpIndex = i; - if (CurrentDef < NumWriteLatencyEntries) { - const MCWriteLatencyEntry &WLE = - *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); - // Conservatively default to MaxLatency. - Write.Latency = - WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); - Write.SClassOrWriteResourceID = WLE.WriteResourceID; - } else { - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - } - Write.IsOptionalDef = false; - LLVM_DEBUG({ - dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - CurrentDef++; - } - - if (CurrentDef != NumExplicitDefs) { - return make_error( - "error: Expected more register operand definitions.", - inconvertibleErrorCode()); - } - - CurrentDef = 0; - for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { - unsigned Index = NumExplicitDefs + CurrentDef; - WriteDescriptor &Write = ID.Writes[Index]; - Write.OpIndex = ~CurrentDef; - Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; - if (Index < NumWriteLatencyEntries) { - const MCWriteLatencyEntry &WLE = - *STI.getWriteLatencyEntry(&SCDesc, Index); - // Conservatively default to MaxLatency. - Write.Latency = - WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); - Write.SClassOrWriteResourceID = WLE.WriteResourceID; - } else { - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - } - - Write.IsOptionalDef = false; - assert(Write.RegisterID != 0 && "Expected a valid phys register!"); - LLVM_DEBUG({ - dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex - << ", PhysReg=" << MRI.getName(Write.RegisterID) - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - } - - if (MCDesc.hasOptionalDef()) { - // Always assume that the optional definition is the last operand of the - // MCInst sequence. - const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1); - if (i == MCI.getNumOperands() || !Op.isReg()) - return make_error( - "error: expected a register operand for an optional " - "definition. Instruction has not be correctly analyzed.", - inconvertibleErrorCode()); - - WriteDescriptor &Write = ID.Writes[TotalDefs - 1]; - Write.OpIndex = MCI.getNumOperands() - 1; - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - Write.IsOptionalDef = true; - } - - return ErrorSuccess(); -} - -Error InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, - unsigned SchedClassID) { - const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); - unsigned NumExplicitDefs = MCDesc.getNumDefs(); - - // Skip explicit definitions. - unsigned i = 0; - for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) { - const MCOperand &Op = MCI.getOperand(i); - if (Op.isReg()) - NumExplicitDefs--; - } - - if (NumExplicitDefs) { - return make_error( - "error: Expected more register operand definitions. ", - inconvertibleErrorCode()); - } - - unsigned NumExplicitUses = MCI.getNumOperands() - i; - unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); - if (MCDesc.hasOptionalDef()) { - assert(NumExplicitUses); - NumExplicitUses--; - } - unsigned TotalUses = NumExplicitUses + NumImplicitUses; - if (!TotalUses) - return ErrorSuccess(); - - ID.Reads.resize(TotalUses); - for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) { - ReadDescriptor &Read = ID.Reads[CurrentUse]; - Read.OpIndex = i + CurrentUse; - Read.UseIndex = CurrentUse; - Read.SchedClassID = SchedClassID; - LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex - << ", UseIndex=" << Read.UseIndex << '\n'); - } - - for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) { - ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse]; - Read.OpIndex = ~CurrentUse; - Read.UseIndex = NumExplicitUses + CurrentUse; - Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse]; - Read.SchedClassID = SchedClassID; - LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID=" - << MRI.getName(Read.RegisterID) << '\n'); - } - return ErrorSuccess(); -} - -Expected -InstrBuilder::createInstrDescImpl(const MCInst &MCI) { - assert(STI.getSchedModel().hasInstrSchedModel() && - "Itineraries are not yet supported!"); - - // Obtain the instruction descriptor from the opcode. - unsigned short Opcode = MCI.getOpcode(); - const MCInstrDesc &MCDesc = MCII.get(Opcode); - const MCSchedModel &SM = STI.getSchedModel(); - - // Then obtain the scheduling class information from the instruction. - unsigned SchedClassID = MCDesc.getSchedClass(); - unsigned CPUID = SM.getProcessorID(); - - // Try to solve variant scheduling classes. - if (SchedClassID) { - while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) - SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); - - if (!SchedClassID) { - return make_error("unable to resolve this variant class.", - inconvertibleErrorCode()); - } - } - - // Check if this instruction is supported. Otherwise, report an error. - const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); - if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { - std::string ToString; - llvm::raw_string_ostream OS(ToString); - WithColor::error() << "found an unsupported instruction in the input" - << " assembly sequence.\n"; - MCIP.printInst(&MCI, OS, "", STI); - OS.flush(); - WithColor::note() << "instruction: " << ToString << '\n'; - return make_error( - "Don't know how to analyze unsupported instructions", - inconvertibleErrorCode()); - } - - // Create a new empty descriptor. - std::unique_ptr ID = llvm::make_unique(); - ID->NumMicroOps = SCDesc.NumMicroOps; - - if (MCDesc.isCall()) { - // We don't correctly model calls. - WithColor::warning() << "found a call in the input assembly sequence.\n"; - WithColor::note() << "call instructions are not correctly modeled. " - << "Assume a latency of 100cy.\n"; - } - - if (MCDesc.isReturn()) { - WithColor::warning() << "found a return instruction in the input" - << " assembly sequence.\n"; - WithColor::note() << "program counter updates are ignored.\n"; - } - - ID->MayLoad = MCDesc.mayLoad(); - ID->MayStore = MCDesc.mayStore(); - ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); - - initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); - computeMaxLatency(*ID, MCDesc, SCDesc, STI); - if (auto Err = populateWrites(*ID, MCI, SchedClassID)) - return std::move(Err); - if (auto Err = populateReads(*ID, MCI, SchedClassID)) - return std::move(Err); - - LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); - LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); - - // Now add the new descriptor. - SchedClassID = MCDesc.getSchedClass(); - if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) { - Descriptors[MCI.getOpcode()] = std::move(ID); - return *Descriptors[MCI.getOpcode()]; - } - - VariantDescriptors[&MCI] = std::move(ID); - return *VariantDescriptors[&MCI]; -} - -Expected -InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { - if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) - return *Descriptors[MCI.getOpcode()]; - - if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) - return *VariantDescriptors[&MCI]; - - return createInstrDescImpl(MCI); -} - -Expected> -InstrBuilder::createInstruction(const MCInst &MCI) { - Expected DescOrErr = getOrCreateInstrDesc(MCI); - if (!DescOrErr) - return DescOrErr.takeError(); - const InstrDesc &D = *DescOrErr; - std::unique_ptr NewIS = llvm::make_unique(D); - - // Initialize Reads first. - for (const ReadDescriptor &RD : D.Reads) { - int RegID = -1; - if (!RD.isImplicitRead()) { - // explicit read. - const MCOperand &Op = MCI.getOperand(RD.OpIndex); - // Skip non-register operands. - if (!Op.isReg()) - continue; - RegID = Op.getReg(); - } else { - // Implicit read. - RegID = RD.RegisterID; - } - - // Skip invalid register operands. - if (!RegID) - continue; - - // Okay, this is a register operand. Create a ReadState for it. - assert(RegID > 0 && "Invalid register ID found!"); - NewIS->getUses().emplace_back(llvm::make_unique(RD, RegID)); - } - - // Early exit if there are no writes. - if (D.Writes.empty()) - return std::move(NewIS); - - // Track register writes that implicitly clear the upper portion of the - // underlying super-registers using an APInt. - APInt WriteMask(D.Writes.size(), 0); - - // Now query the MCInstrAnalysis object to obtain information about which - // register writes implicitly clear the upper portion of a super-register. - MCIA.clearsSuperRegisters(MRI, MCI, WriteMask); - - // Check if this is a dependency breaking instruction. - if (MCIA.isDependencyBreaking(STI, MCI)) - NewIS->setDependencyBreaking(); - - // Initialize writes. - unsigned WriteIndex = 0; - for (const WriteDescriptor &WD : D.Writes) { - unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID - : MCI.getOperand(WD.OpIndex).getReg(); - // Check if this is a optional definition that references NoReg. - if (WD.IsOptionalDef && !RegID) { - ++WriteIndex; - continue; - } - - assert(RegID && "Expected a valid register ID!"); - NewIS->getDefs().emplace_back(llvm::make_unique( - WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex])); - ++WriteIndex; - } - - return std::move(NewIS); -} -} // namespace mca Index: llvm/trunk/tools/llvm-mca/Instruction.h =================================================================== --- llvm/trunk/tools/llvm-mca/Instruction.h +++ llvm/trunk/tools/llvm-mca/Instruction.h @@ -1,449 +0,0 @@ -//===--------------------- Instruction.h ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines abstractions used by the Pipeline to model register reads, -/// register writes and instructions. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H -#define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H - -#include "llvm/Support/MathExtras.h" - -#ifndef NDEBUG -#include "llvm/Support/raw_ostream.h" -#endif - -#include -#include -#include - -namespace mca { - -constexpr int UNKNOWN_CYCLES = -512; - -/// A register write descriptor. -struct WriteDescriptor { - // Operand index. The index is negative for implicit writes only. - // For implicit writes, the actual operand index is computed performing - // a bitwise not of the OpIndex. - int OpIndex; - // Write latency. Number of cycles before write-back stage. - unsigned Latency; - // This field is set to a value different than zero only if this - // is an implicit definition. - unsigned RegisterID; - // Instruction itineraries would set this field to the SchedClass ID. - // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry - // element associated to this write. - // When computing read latencies, this value is matched against the - // "ReadAdvance" information. The hardware backend may implement - // dedicated forwarding paths to quickly propagate write results to dependent - // instructions waiting in the reservation station (effectively bypassing the - // write-back stage). - unsigned SClassOrWriteResourceID; - // True only if this is a write obtained from an optional definition. - // Optional definitions are allowed to reference regID zero (i.e. "no - // register"). - bool IsOptionalDef; - - bool isImplicitWrite() const { return OpIndex < 0; }; -}; - -/// A register read descriptor. -struct ReadDescriptor { - // A MCOperand index. This is used by the Dispatch logic to identify register - // reads. Implicit reads have negative indices. The actual operand index of an - // implicit read is the bitwise not of field OpIndex. - int OpIndex; - // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit - // uses always come first in the sequence of uses. - unsigned UseIndex; - // This field is only set if this is an implicit read. - unsigned RegisterID; - // Scheduling Class Index. It is used to query the scheduling model for the - // MCSchedClassDesc object. - unsigned SchedClassID; - - bool isImplicitRead() const { return OpIndex < 0; }; -}; - -class ReadState; - -/// Tracks uses of a register definition (e.g. register write). -/// -/// Each implicit/explicit register write is associated with an instance of -/// this class. A WriteState object tracks the dependent users of a -/// register write. It also tracks how many cycles are left before the write -/// back stage. -class WriteState { - const WriteDescriptor &WD; - // On instruction issue, this field is set equal to the write latency. - // Before instruction issue, this field defaults to -512, a special - // value that represents an "unknown" number of cycles. - int CyclesLeft; - - // Actual register defined by this write. This field is only used - // to speedup queries on the register file. - // For implicit writes, this field always matches the value of - // field RegisterID from WD. - unsigned RegisterID; - - // True if this write implicitly clears the upper portion of RegisterID's - // super-registers. - bool ClearsSuperRegs; - - // This field is set if this is a partial register write, and it has a false - // dependency on any previous write of the same register (or a portion of it). - // DependentWrite must be able to complete before this write completes, so - // that we don't break the WAW, and the two writes can be merged together. - const WriteState *DependentWrite; - - // Number of writes that are in a WAW dependency with this write. - unsigned NumWriteUsers; - - // A list of dependent reads. Users is a set of dependent - // reads. A dependent read is added to the set only if CyclesLeft - // is "unknown". As soon as CyclesLeft is 'known', each user in the set - // gets notified with the actual CyclesLeft. - - // The 'second' element of a pair is a "ReadAdvance" number of cycles. - std::set> Users; - -public: - WriteState(const WriteDescriptor &Desc, unsigned RegID, - bool clearsSuperRegs = false) - : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), - ClearsSuperRegs(clearsSuperRegs), DependentWrite(nullptr), - NumWriteUsers(0U) {} - WriteState(const WriteState &Other) = delete; - WriteState &operator=(const WriteState &Other) = delete; - - int getCyclesLeft() const { return CyclesLeft; } - unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; } - unsigned getRegisterID() const { return RegisterID; } - unsigned getLatency() const { return WD.Latency; } - - void addUser(ReadState *Use, int ReadAdvance); - - unsigned getNumUsers() const { return Users.size() + NumWriteUsers; } - bool clearsSuperRegisters() const { return ClearsSuperRegs; } - - const WriteState *getDependentWrite() const { return DependentWrite; } - void setDependentWrite(WriteState *Other) { - DependentWrite = Other; - ++Other->NumWriteUsers; - } - - // On every cycle, update CyclesLeft and notify dependent users. - void cycleEvent(); - void onInstructionIssued(); - -#ifndef NDEBUG - void dump() const; -#endif -}; - -/// Tracks register operand latency in cycles. -/// -/// A read may be dependent on more than one write. This occurs when some -/// writes only partially update the register associated to this read. -class ReadState { - const ReadDescriptor &RD; - // Physical register identified associated to this read. - unsigned RegisterID; - // Number of writes that contribute to the definition of RegisterID. - // In the absence of partial register updates, the number of DependentWrites - // cannot be more than one. - unsigned DependentWrites; - // Number of cycles left before RegisterID can be read. This value depends on - // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES. - // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of - // every dependent write is known. - int CyclesLeft; - // This field is updated on every writeStartEvent(). When the number of - // dependent writes (i.e. field DependentWrite) is zero, this value is - // propagated to field CyclesLeft. - unsigned TotalCycles; - // This field is set to true only if there are no dependent writes, and - // there are no `CyclesLeft' to wait. - bool IsReady; - -public: - ReadState(const ReadDescriptor &Desc, unsigned RegID) - : RD(Desc), RegisterID(RegID), DependentWrites(0), - CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {} - ReadState(const ReadState &Other) = delete; - ReadState &operator=(const ReadState &Other) = delete; - - const ReadDescriptor &getDescriptor() const { return RD; } - unsigned getSchedClass() const { return RD.SchedClassID; } - unsigned getRegisterID() const { return RegisterID; } - - bool isReady() const { return IsReady; } - bool isImplicitRead() const { return RD.isImplicitRead(); } - - void cycleEvent(); - void writeStartEvent(unsigned Cycles); - void setDependentWrites(unsigned Writes) { - DependentWrites = Writes; - IsReady = !Writes; - } -}; - -/// A sequence of cycles. -/// -/// This class can be used as a building block to construct ranges of cycles. -class CycleSegment { - unsigned Begin; // Inclusive. - unsigned End; // Exclusive. - bool Reserved; // Resources associated to this segment must be reserved. - -public: - CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false) - : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {} - - bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; } - bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; } - bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; } - bool overlaps(const CycleSegment &CS) const { - return !startsAfter(CS) && !endsBefore(CS); - } - bool isExecuting() const { return Begin == 0 && End != 0; } - bool isExecuted() const { return End == 0; } - bool operator<(const CycleSegment &Other) const { - return Begin < Other.Begin; - } - CycleSegment &operator--(void) { - if (Begin) - Begin--; - if (End) - End--; - return *this; - } - - bool isValid() const { return Begin <= End; } - unsigned size() const { return End - Begin; }; - void Subtract(unsigned Cycles) { - assert(End >= Cycles); - End -= Cycles; - } - - unsigned begin() const { return Begin; } - unsigned end() const { return End; } - void setEnd(unsigned NewEnd) { End = NewEnd; } - bool isReserved() const { return Reserved; } - void setReserved() { Reserved = true; } -}; - -/// Helper used by class InstrDesc to describe how hardware resources -/// are used. -/// -/// This class describes how many resource units of a specific resource kind -/// (and how many cycles) are "used" by an instruction. -struct ResourceUsage { - CycleSegment CS; - unsigned NumUnits; - ResourceUsage(CycleSegment Cycles, unsigned Units = 1) - : CS(Cycles), NumUnits(Units) {} - unsigned size() const { return CS.size(); } - bool isReserved() const { return CS.isReserved(); } - void setReserved() { CS.setReserved(); } -}; - -/// An instruction descriptor -struct InstrDesc { - std::vector Writes; // Implicit writes are at the end. - std::vector Reads; // Implicit reads are at the end. - - // For every resource used by an instruction of this kind, this vector - // reports the number of "consumed cycles". - std::vector> Resources; - - // A list of buffered resources consumed by this instruction. - std::vector Buffers; - unsigned MaxLatency; - // Number of MicroOps for this instruction. - unsigned NumMicroOps; - - bool MayLoad; - bool MayStore; - bool HasSideEffects; - - // A zero latency instruction doesn't consume any scheduler resources. - bool isZeroLatency() const { return !MaxLatency && Resources.empty(); } -}; - -/// An instruction propagated through the simulated instruction pipeline. -/// -/// This class is used to monitor changes to the internal state of instructions -/// that are sent to the various components of the simulated hardware pipeline. -class Instruction { - const InstrDesc &Desc; - - enum InstrStage { - IS_INVALID, // Instruction in an invalid state. - IS_AVAILABLE, // Instruction dispatched but operands are not ready. - IS_READY, // Instruction dispatched and operands ready. - IS_EXECUTING, // Instruction issued. - IS_EXECUTED, // Instruction executed. Values are written back. - IS_RETIRED // Instruction retired. - }; - - // The current instruction stage. - enum InstrStage Stage; - - // This value defaults to the instruction latency. This instruction is - // considered executed when field CyclesLeft goes to zero. - int CyclesLeft; - - // Retire Unit token ID for this instruction. - unsigned RCUTokenID; - - bool IsDepBreaking; - - using UniqueDef = std::unique_ptr; - using UniqueUse = std::unique_ptr; - using VecDefs = std::vector; - using VecUses = std::vector; - - // Output dependencies. - // One entry per each implicit and explicit register definition. - VecDefs Defs; - - // Input dependencies. - // One entry per each implicit and explicit register use. - VecUses Uses; - -public: - Instruction(const InstrDesc &D) - : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0), - IsDepBreaking(false) {} - Instruction(const Instruction &Other) = delete; - Instruction &operator=(const Instruction &Other) = delete; - - VecDefs &getDefs() { return Defs; } - const VecDefs &getDefs() const { return Defs; } - VecUses &getUses() { return Uses; } - const VecUses &getUses() const { return Uses; } - const InstrDesc &getDesc() const { return Desc; } - unsigned getRCUTokenID() const { return RCUTokenID; } - int getCyclesLeft() const { return CyclesLeft; } - - bool hasDependentUsers() const { - return std::any_of(Defs.begin(), Defs.end(), [](const UniqueDef &Def) { - return Def->getNumUsers() > 0; - }); - } - - bool isDependencyBreaking() const { return IsDepBreaking; } - void setDependencyBreaking() { IsDepBreaking = true; } - - unsigned getNumUsers() const { - unsigned NumUsers = 0; - for (const UniqueDef &Def : Defs) - NumUsers += Def->getNumUsers(); - return NumUsers; - } - - // Transition to the dispatch stage, and assign a RCUToken to this - // instruction. The RCUToken is used to track the completion of every - // register write performed by this instruction. - void dispatch(unsigned RCUTokenID); - - // Instruction issued. Transition to the IS_EXECUTING state, and update - // all the definitions. - void execute(); - - // Force a transition from the IS_AVAILABLE state to the IS_READY state if - // input operands are all ready. State transitions normally occur at the - // beginning of a new cycle (see method cycleEvent()). However, the scheduler - // may decide to promote instructions from the wait queue to the ready queue - // as the result of another issue event. This method is called every time the - // instruction might have changed in state. - void update(); - - bool isDispatched() const { return Stage == IS_AVAILABLE; } - bool isReady() const { return Stage == IS_READY; } - bool isExecuting() const { return Stage == IS_EXECUTING; } - bool isExecuted() const { return Stage == IS_EXECUTED; } - bool isRetired() const { return Stage == IS_RETIRED; } - - void retire() { - assert(isExecuted() && "Instruction is in an invalid state!"); - Stage = IS_RETIRED; - } - - void cycleEvent(); -}; - -/// An InstRef contains both a SourceMgr index and Instruction pair. The index -/// is used as a unique identifier for the instruction. MCA will make use of -/// this index as a key throughout MCA. -class InstRef : public std::pair { -public: - InstRef() : std::pair(0, nullptr) {} - InstRef(unsigned Index, Instruction *I) - : std::pair(Index, I) {} - - unsigned getSourceIndex() const { return first; } - Instruction *getInstruction() { return second; } - const Instruction *getInstruction() const { return second; } - - /// Returns true if this references a valid instruction. - bool isValid() const { return second != nullptr; } - - /// Invalidate this reference. - void invalidate() { second = nullptr; } - -#ifndef NDEBUG - void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); } -#endif -}; - -#ifndef NDEBUG -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InstRef &IR) { - IR.print(OS); - return OS; -} -#endif - -/// A reference to a register write. -/// -/// This class is mainly used by the register file to describe register -/// mappings. It correlates a register write to the source index of the -/// defining instruction. -class WriteRef { - std::pair Data; - static const unsigned INVALID_IID; - -public: - WriteRef() : Data(INVALID_IID, nullptr) {} - WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {} - - unsigned getSourceIndex() const { return Data.first; } - const WriteState *getWriteState() const { return Data.second; } - WriteState *getWriteState() { return Data.second; } - void invalidate() { Data = std::make_pair(INVALID_IID, nullptr); } - - bool isValid() const { - return Data.first != INVALID_IID && Data.second != nullptr; - } - bool operator==(const WriteRef &Other) const { return Data == Other.Data; } - -#ifndef NDEBUG - void dump() const; -#endif -}; - -} // namespace mca - -#endif Index: llvm/trunk/tools/llvm-mca/Instruction.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Instruction.cpp +++ llvm/trunk/tools/llvm-mca/Instruction.cpp @@ -1,177 +0,0 @@ -//===--------------------- Instruction.cpp ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines abstractions used by the Pipeline to model register reads, -// register writes and instructions. -// -//===----------------------------------------------------------------------===// - -#include "Instruction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace mca { - -using namespace llvm; - -void ReadState::writeStartEvent(unsigned Cycles) { - assert(DependentWrites); - assert(CyclesLeft == UNKNOWN_CYCLES); - - // This read may be dependent on more than one write. This typically occurs - // when a definition is the result of multiple writes where at least one - // write does a partial register update. - // The HW is forced to do some extra bookkeeping to track of all the - // dependent writes, and implement a merging scheme for the partial writes. - --DependentWrites; - TotalCycles = std::max(TotalCycles, Cycles); - - if (!DependentWrites) { - CyclesLeft = TotalCycles; - IsReady = !CyclesLeft; - } -} - -void WriteState::onInstructionIssued() { - assert(CyclesLeft == UNKNOWN_CYCLES); - // Update the number of cycles left based on the WriteDescriptor info. - CyclesLeft = getLatency(); - - // Now that the time left before write-back is known, notify - // all the users. - for (const std::pair &User : Users) { - ReadState *RS = User.first; - unsigned ReadCycles = std::max(0, CyclesLeft - User.second); - RS->writeStartEvent(ReadCycles); - } -} - -void WriteState::addUser(ReadState *User, int ReadAdvance) { - // If CyclesLeft is different than -1, then we don't need to - // update the list of users. We can just notify the user with - // the actual number of cycles left (which may be zero). - if (CyclesLeft != UNKNOWN_CYCLES) { - unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance); - User->writeStartEvent(ReadCycles); - return; - } - - std::pair NewPair(User, ReadAdvance); - Users.insert(NewPair); -} - -void WriteState::cycleEvent() { - // Note: CyclesLeft can be a negative number. It is an error to - // make it an unsigned quantity because users of this write may - // specify a negative ReadAdvance. - if (CyclesLeft != UNKNOWN_CYCLES) - CyclesLeft--; -} - -void ReadState::cycleEvent() { - // Update the total number of cycles. - if (DependentWrites && TotalCycles) { - --TotalCycles; - return; - } - - // Bail out immediately if we don't know how many cycles are left. - if (CyclesLeft == UNKNOWN_CYCLES) - return; - - if (CyclesLeft) { - --CyclesLeft; - IsReady = !CyclesLeft; - } -} - -#ifndef NDEBUG -void WriteState::dump() const { - dbgs() << "{ OpIdx=" << WD.OpIndex << ", Lat=" << getLatency() << ", RegID " - << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }"; -} - -void WriteRef::dump() const { - dbgs() << "IID=" << getSourceIndex() << ' '; - if (isValid()) - getWriteState()->dump(); - else - dbgs() << "(null)"; -} -#endif - -void Instruction::dispatch(unsigned RCUToken) { - assert(Stage == IS_INVALID); - Stage = IS_AVAILABLE; - RCUTokenID = RCUToken; - - // Check if input operands are already available. - update(); -} - -void Instruction::execute() { - assert(Stage == IS_READY); - Stage = IS_EXECUTING; - - // Set the cycles left before the write-back stage. - CyclesLeft = Desc.MaxLatency; - - for (UniqueDef &Def : Defs) - Def->onInstructionIssued(); - - // Transition to the "executed" stage if this is a zero-latency instruction. - if (!CyclesLeft) - Stage = IS_EXECUTED; -} - -void Instruction::update() { - assert(isDispatched() && "Unexpected instruction stage found!"); - - if (!llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); })) - return; - - // A partial register write cannot complete before a dependent write. - auto IsDefReady = [&](const UniqueDef &Def) { - if (const WriteState *Write = Def->getDependentWrite()) { - int WriteLatency = Write->getCyclesLeft(); - if (WriteLatency == UNKNOWN_CYCLES) - return false; - return static_cast(WriteLatency) < Desc.MaxLatency; - } - return true; - }; - - if (llvm::all_of(Defs, IsDefReady)) - Stage = IS_READY; -} - -void Instruction::cycleEvent() { - if (isReady()) - return; - - if (isDispatched()) { - for (UniqueUse &Use : Uses) - Use->cycleEvent(); - - update(); - return; - } - - assert(isExecuting() && "Instruction not in-flight?"); - assert(CyclesLeft && "Instruction already executed?"); - for (UniqueDef &Def : Defs) - Def->cycleEvent(); - CyclesLeft--; - if (!CyclesLeft) - Stage = IS_EXECUTED; -} - -const unsigned WriteRef::INVALID_IID = std::numeric_limits::max(); - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/InstructionTables.h =================================================================== --- llvm/trunk/tools/llvm-mca/InstructionTables.h +++ llvm/trunk/tools/llvm-mca/InstructionTables.h @@ -1,42 +0,0 @@ -//===--------------------- InstructionTables.h ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements a custom stage to generate instruction tables. -/// See the description of command-line flag -instruction-tables in -/// docs/CommandGuide/lvm-mca.rst -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H -#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H - -#include "InstrBuilder.h" -#include "Scheduler.h" -#include "Stage.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" - -namespace mca { - -class InstructionTables final : public Stage { - const llvm::MCSchedModel &SM; - InstrBuilder &IB; - llvm::SmallVector, 4> UsedResources; - -public: - InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder) - : Stage(), SM(Model), IB(Builder) {} - - bool hasWorkToComplete() const override { return false; } - llvm::Error execute(InstRef &IR) override; -}; -} // namespace mca - -#endif Index: llvm/trunk/tools/llvm-mca/InstructionTables.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/InstructionTables.cpp +++ llvm/trunk/tools/llvm-mca/InstructionTables.cpp @@ -1,70 +0,0 @@ -//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements the method InstructionTables::execute(). -/// Method execute() prints a theoretical resource pressure distribution based -/// on the information available in the scheduling model, and without running -/// the pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "InstructionTables.h" - -namespace mca { - -using namespace llvm; - -Error InstructionTables::execute(InstRef &IR) { - ArrayRef Masks = IB.getProcResourceMasks(); - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - UsedResources.clear(); - - // Identify the resources consumed by this instruction. - for (const std::pair Resource : Desc.Resources) { - // Skip zero-cycle resources (i.e., unused resources). - if (!Resource.second.size()) - continue; - double Cycles = static_cast(Resource.second.size()); - unsigned Index = std::distance( - Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first)); - const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index); - unsigned NumUnits = ProcResource.NumUnits; - if (!ProcResource.SubUnitsIdxBegin) { - // The number of cycles consumed by each unit. - Cycles /= NumUnits; - for (unsigned I = 0, E = NumUnits; I < E; ++I) { - ResourceRef ResourceUnit = std::make_pair(Index, 1U << I); - UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles)); - } - continue; - } - - // This is a group. Obtain the set of resources contained in this - // group. Some of these resources may implement multiple units. - // Uniformly distribute Cycles across all of the units. - for (unsigned I1 = 0; I1 < NumUnits; ++I1) { - unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1]; - const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx); - // Compute the number of cycles consumed by each resource unit. - double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits); - for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) { - ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2); - UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles)); - } - } - } - - // Send a fake instruction issued event to all the views. - HWInstructionIssuedEvent Event(IR, UsedResources); - notifyEvent(Event); - return ErrorSuccess(); -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/LSUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/LSUnit.h +++ llvm/trunk/tools/llvm-mca/LSUnit.h @@ -1,161 +0,0 @@ -//===------------------------- LSUnit.h --------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A Load/Store unit class that models load/store queues and that implements -/// a simple weak memory consistency model. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H -#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H - -#include "HardwareUnit.h" -#include - -namespace mca { - -class InstRef; -struct InstrDesc; - -/// A Load/Store Unit implementing a load and store queues. -/// -/// This class implements a load queue and a store queue to emulate the -/// out-of-order execution of memory operations. -/// Each load (or store) consumes an entry in the load (or store) queue. -/// -/// Rules are: -/// 1) A younger load is allowed to pass an older load only if there are no -/// stores nor barriers in between the two loads. -/// 2) An younger store is not allowed to pass an older store. -/// 3) A younger store is not allowed to pass an older load. -/// 4) A younger load is allowed to pass an older store only if the load does -/// not alias with the store. -/// -/// This class optimistically assumes that loads don't alias store operations. -/// Under this assumption, younger loads are always allowed to pass older -/// stores (this would only affects rule 4). -/// Essentially, this LSUnit doesn't attempt to run any sort alias analysis to -/// predict when loads and stores don't alias with eachother. -/// -/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be -/// set to `false` by the constructor of LSUnit. -/// -/// In the case of write-combining memory, rule 2. could be relaxed to allow -/// reordering of non-aliasing store operations. At the moment, this is not -/// allowed. -/// To put it in another way, there is no option to specify a different memory -/// type for memory operations (example: write-through, write-combining, etc.). -/// Also, there is no way to weaken the memory model, and this unit currently -/// doesn't support write-combining behavior. -/// -/// No assumptions are made on the size of the store buffer. -/// As mentioned before, this class doesn't perform alias analysis. -/// Consequently, LSUnit doesn't know how to identify cases where -/// store-to-load forwarding may occur. -/// -/// LSUnit doesn't attempt to predict whether a load or store hits or misses -/// the L1 cache. To be more specific, LSUnit doesn't know anything about -/// the cache hierarchy and memory types. -/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the -/// scheduling model provides an "optimistic" load-to-use latency (which usually -/// matches the load-to-use latency for when there is a hit in the L1D). -/// -/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor -/// memory-barrier like instructions. -/// LSUnit conservatively assumes that an instruction which `mayLoad` and has -/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it -/// serializes loads without forcing a flush of the load queue. -/// Similarly, instructions that both `mayStore` and have `unmodeled side -/// effects` are treated like store barriers. A full memory -/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side -/// effects. This is obviously inaccurate, but this is the best that we can do -/// at the moment. -/// -/// Each load/store barrier consumes one entry in the load/store queue. A -/// load/store barrier enforces ordering of loads/stores: -/// - A younger load cannot pass a load barrier. -/// - A younger store cannot pass a store barrier. -/// -/// A younger load has to wait for the memory load barrier to execute. -/// A load/store barrier is "executed" when it becomes the oldest entry in -/// the load/store queue(s). That also means, all the older loads/stores have -/// already been executed. -class LSUnit : public HardwareUnit { - // Load queue size. - // LQ_Size == 0 means that there are infinite slots in the load queue. - unsigned LQ_Size; - - // Store queue size. - // SQ_Size == 0 means that there are infinite slots in the store queue. - unsigned SQ_Size; - - // If true, loads will never alias with stores. This is the default. - bool NoAlias; - - std::set LoadQueue; - std::set StoreQueue; - - void assignLQSlot(unsigned Index); - void assignSQSlot(unsigned Index); - bool isReadyNoAlias(unsigned Index) const; - - // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is - // conservatively treated as a store barrier. It forces older store to be - // executed before newer stores are issued. - std::set StoreBarriers; - - // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is - // conservatively treated as a load barrier. It forces older loads to execute - // before newer loads are issued. - std::set LoadBarriers; - - bool isSQEmpty() const { return StoreQueue.empty(); } - bool isLQEmpty() const { return LoadQueue.empty(); } - bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; } - bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; } - -public: - LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false) - : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {} - -#ifndef NDEBUG - void dump() const; -#endif - - enum Status { - LSU_AVAILABLE = 0, - LSU_LQUEUE_FULL, - LSU_SQUEUE_FULL - }; - - // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve - // IR. It also returns LSU_AVAILABLE if IR is not a memory operation. - Status isAvailable(const InstRef &IR) const; - - // Allocates load/store queue resources for IR. - // - // This method assumes that a previous call to `isAvailable(IR)` returned - // LSU_AVAILABLE, and that IR is a memory operation. - void dispatch(const InstRef &IR); - - // By default, rules are: - // 1. A store may not pass a previous store. - // 2. A load may not pass a previous store unless flag 'NoAlias' is set. - // 3. A load may pass a previous load. - // 4. A store may not pass a previous load (regardless of flag 'NoAlias'). - // 5. A load has to wait until an older load barrier is fully executed. - // 6. A store has to wait until an older store barrier is fully executed. - virtual bool isReady(const InstRef &IR) const; - void onInstructionExecuted(const InstRef &IR); -}; - -} // namespace mca - -#endif Index: llvm/trunk/tools/llvm-mca/LSUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/LSUnit.cpp +++ llvm/trunk/tools/llvm-mca/LSUnit.cpp @@ -1,157 +0,0 @@ -//===----------------------- LSUnit.cpp --------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A Load-Store Unit for the llvm-mca tool. -/// -//===----------------------------------------------------------------------===// - -#include "LSUnit.h" -#include "Instruction.h" - -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "llvm-mca" - -namespace mca { - -#ifndef NDEBUG -void LSUnit::dump() const { - dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n'; - dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n'; - dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n'; - dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n'; -} -#endif - -void LSUnit::assignLQSlot(unsigned Index) { - assert(!isLQFull()); - assert(LoadQueue.count(Index) == 0); - - LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot \n"); - LoadQueue.insert(Index); -} - -void LSUnit::assignSQSlot(unsigned Index) { - assert(!isSQFull()); - assert(StoreQueue.count(Index) == 0); - - LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot \n"); - StoreQueue.insert(Index); -} - -void LSUnit::dispatch(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned IsMemBarrier = Desc.HasSideEffects; - assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); - - const unsigned Index = IR.getSourceIndex(); - if (Desc.MayLoad) { - if (IsMemBarrier) - LoadBarriers.insert(Index); - assignLQSlot(Index); - } - - if (Desc.MayStore) { - if (IsMemBarrier) - StoreBarriers.insert(Index); - assignSQSlot(Index); - } -} - -LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.MayLoad && isLQFull()) - return LSUnit::LSU_LQUEUE_FULL; - if (Desc.MayStore && isSQFull()) - return LSUnit::LSU_SQUEUE_FULL; - return LSUnit::LSU_AVAILABLE; -} - -bool LSUnit::isReady(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - const unsigned Index = IR.getSourceIndex(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - assert((IsALoad || IsAStore) && "Not a memory operation!"); - assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!"); - assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!"); - - if (IsALoad && !LoadBarriers.empty()) { - unsigned LoadBarrierIndex = *LoadBarriers.begin(); - if (Index > LoadBarrierIndex) - return false; - if (Index == LoadBarrierIndex && Index != *LoadQueue.begin()) - return false; - } - - if (IsAStore && !StoreBarriers.empty()) { - unsigned StoreBarrierIndex = *StoreBarriers.begin(); - if (Index > StoreBarrierIndex) - return false; - if (Index == StoreBarrierIndex && Index != *StoreQueue.begin()) - return false; - } - - if (NoAlias && IsALoad) - return true; - - if (StoreQueue.size()) { - // Check if this memory operation is younger than the older store. - if (Index > *StoreQueue.begin()) - return false; - } - - // Okay, we are older than the oldest store in the queue. - // If there are no pending loads, then we can say for sure that this - // instruction is ready. - if (isLQEmpty()) - return true; - - // Check if there are no older loads. - if (Index <= *LoadQueue.begin()) - return true; - - // There is at least one younger load. - return !IsAStore; -} - -void LSUnit::onInstructionExecuted(const InstRef &IR) { - const unsigned Index = IR.getSourceIndex(); - std::set::iterator it = LoadQueue.find(Index); - if (it != LoadQueue.end()) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the load queue.\n"); - LoadQueue.erase(it); - } - - it = StoreQueue.find(Index); - if (it != StoreQueue.end()) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the store queue.\n"); - StoreQueue.erase(it); - } - - if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the set of store barriers.\n"); - StoreBarriers.erase(StoreBarriers.begin()); - } - if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the set of load barriers.\n"); - LoadBarriers.erase(LoadBarriers.begin()); - } -} -} // namespace mca Index: llvm/trunk/tools/llvm-mca/Pipeline.h =================================================================== --- llvm/trunk/tools/llvm-mca/Pipeline.h +++ llvm/trunk/tools/llvm-mca/Pipeline.h @@ -1,76 +0,0 @@ -//===--------------------- Pipeline.h ---------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements an ordered container of stages that simulate the -/// pipeline of a hardware backend. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINE_H -#define LLVM_TOOLS_LLVM_MCA_PIPELINE_H - -#include "Scheduler.h" -#include "Stage.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Error.h" - -namespace mca { - -class HWEventListener; -class HWInstructionEvent; -class HWStallEvent; - -/// A pipeline for a specific subtarget. -/// -/// It emulates an out-of-order execution of instructions. Instructions are -/// fetched from a MCInst sequence managed by an initial 'Fetch' stage. -/// Instructions are firstly fetched, then dispatched to the schedulers, and -/// then executed. -/// -/// This class tracks the lifetime of an instruction from the moment where -/// it gets dispatched to the schedulers, to the moment where it finishes -/// executing and register writes are architecturally committed. -/// In particular, it monitors changes in the state of every instruction -/// in flight. -/// -/// Instructions are executed in a loop of iterations. The number of iterations -/// is defined by the SourceMgr object, which is managed by the initial stage -/// of the instruction pipeline. -/// -/// The Pipeline entry point is method 'run()' which executes cycles in a loop -/// until there are new instructions to dispatch, and not every instruction -/// has been retired. -/// -/// Internally, the Pipeline collects statistical information in the form of -/// histograms. For example, it tracks how the dispatch group size changes -/// over time. -class Pipeline { - Pipeline(const Pipeline &P) = delete; - Pipeline &operator=(const Pipeline &P) = delete; - - /// An ordered list of stages that define this instruction pipeline. - llvm::SmallVector, 8> Stages; - std::set Listeners; - unsigned Cycles; - - llvm::Error runCycle(); - bool hasWorkToProcess(); - void notifyCycleBegin(); - void notifyCycleEnd(); - -public: - Pipeline() : Cycles(0) {} - void appendStage(std::unique_ptr S); - llvm::Error run(); - void addEventListener(HWEventListener *Listener); -}; -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_PIPELINE_H Index: llvm/trunk/tools/llvm-mca/Pipeline.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Pipeline.cpp +++ llvm/trunk/tools/llvm-mca/Pipeline.cpp @@ -1,97 +0,0 @@ -//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements an ordered container of stages that simulate the -/// pipeline of a hardware backend. -/// -//===----------------------------------------------------------------------===// - -#include "Pipeline.h" -#include "HWEventListener.h" -#include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/Debug.h" - -namespace mca { - -#define DEBUG_TYPE "llvm-mca" - -using namespace llvm; - -void Pipeline::addEventListener(HWEventListener *Listener) { - if (Listener) - Listeners.insert(Listener); - for (auto &S : Stages) - S->addListener(Listener); -} - -bool Pipeline::hasWorkToProcess() { - return llvm::any_of(Stages, [](const std::unique_ptr &S) { - return S->hasWorkToComplete(); - }); -} - -llvm::Error Pipeline::run() { - assert(!Stages.empty() && "Unexpected empty pipeline found!"); - - while (hasWorkToProcess()) { - notifyCycleBegin(); - if (llvm::Error Err = runCycle()) - return Err; - notifyCycleEnd(); - ++Cycles; - } - return llvm::ErrorSuccess(); -} - -llvm::Error Pipeline::runCycle() { - llvm::Error Err = llvm::ErrorSuccess(); - // Update stages before we start processing new instructions. - for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { - const std::unique_ptr &S = *I; - Err = S->cycleStart(); - } - - // Now fetch and execute new instructions. - InstRef IR; - Stage &FirstStage = *Stages[0]; - while (!Err && FirstStage.isAvailable(IR)) - Err = FirstStage.execute(IR); - - // Update stages in preparation for a new cycle. - for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { - const std::unique_ptr &S = *I; - Err = S->cycleEnd(); - } - - return Err; -} - -void Pipeline::appendStage(std::unique_ptr S) { - assert(S && "Invalid null stage in input!"); - if (!Stages.empty()) { - Stage *Last = Stages.back().get(); - Last->setNextInSequence(S.get()); - } - - Stages.push_back(std::move(S)); -} - -void Pipeline::notifyCycleBegin() { - LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n'); - for (HWEventListener *Listener : Listeners) - Listener->onCycleBegin(); -} - -void Pipeline::notifyCycleEnd() { - LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n"); - for (HWEventListener *Listener : Listeners) - Listener->onCycleEnd(); -} -} // namespace mca. Index: llvm/trunk/tools/llvm-mca/RegisterFile.h =================================================================== --- llvm/trunk/tools/llvm-mca/RegisterFile.h +++ llvm/trunk/tools/llvm-mca/RegisterFile.h @@ -1,171 +0,0 @@ -//===--------------------- RegisterFile.h -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a register mapping file class. This class is responsible -/// for managing hardware register files and the tracking of data dependencies -/// between registers. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H -#define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H - -#include "HardwareUnit.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSchedule.h" -#include "llvm/Support/Error.h" - -namespace mca { - -class ReadState; -class WriteState; -class WriteRef; - -/// Manages hardware register files, and tracks register definitions for -/// register renaming purposes. -class RegisterFile : public HardwareUnit { - const llvm::MCRegisterInfo &MRI; - - // Each register file is associated with an instance of - // RegisterMappingTracker. - // A RegisterMappingTracker keeps track of the number of physical registers - // which have been dynamically allocated by the simulator. - struct RegisterMappingTracker { - // The total number of physical registers that are available in this - // register file for register renaming purpouses. A value of zero for this - // field means: this register file has an unbounded number of physical - // registers. - const unsigned NumPhysRegs; - // Number of physical registers that are currently in use. - unsigned NumUsedPhysRegs; - - RegisterMappingTracker(unsigned NumPhysRegisters) - : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0) {} - }; - - // A vector of register file descriptors. This set always contains at least - // one entry. Entry at index #0 is reserved. That entry describes a register - // file with an unbounded number of physical registers that "sees" all the - // hardware registers declared by the target (i.e. all the register - // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is - // the target name). - // - // Users can limit the number of physical registers that are available in - // regsiter file #0 specifying command line flag `-register-file-size=`. - llvm::SmallVector RegisterFiles; - - // This type is used to propagate information about the owner of a register, - // and the cost of allocating it in the PRF. Register cost is defined as the - // number of physical registers consumed by the PRF to allocate a user - // register. - // - // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical - // registers. So, the cost of allocating a YMM register in BtVer2 is 2. - using IndexPlusCostPairTy = std::pair; - - // Struct RegisterRenamingInfo maps registers to register files. - // There is a RegisterRenamingInfo object for every register defined by - // the target. RegisteRenamingInfo objects are stored into vector - // RegisterMappings, and register IDs can be used to reference them. - struct RegisterRenamingInfo { - IndexPlusCostPairTy IndexPlusCost; - llvm::MCPhysReg RenameAs; - }; - - // RegisterMapping objects are mainly used to track physical register - // definitions. There is a RegisterMapping for every register defined by the - // Target. For each register, a RegisterMapping pair contains a descriptor of - // the last register write (in the form of a WriteRef object), as well as a - // RegisterRenamingInfo to quickly identify owning register files. - // - // This implementation does not allow overlapping register files. The only - // register file that is allowed to overlap with other register files is - // register file #0. If we exclude register #0, every register is "owned" by - // at most one register file. - using RegisterMapping = std::pair; - - // This map contains one entry for each register defined by the target. - std::vector RegisterMappings; - - // This method creates a new register file descriptor. - // The new register file owns all of the registers declared by register - // classes in the 'RegisterClasses' set. - // - // Processor models allow the definition of RegisterFile(s) via tablegen. For - // example, this is a tablegen definition for a x86 register file for - // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1 - // physical register). - // - // def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]> - // - // Here FPRegisterFile contains all the registers defined by register class - // VR128RegClass and VR256RegClass. FPRegisterFile implements 60 - // registers which can be used for register renaming purpose. - void - addRegisterFile(llvm::ArrayRef RegisterClasses, - unsigned NumPhysRegs); - - // Consumes physical registers in each register file specified by the - // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`. - void allocatePhysRegs(const RegisterRenamingInfo &Entry, - llvm::MutableArrayRef UsedPhysRegs); - - // Releases previously allocated physical registers from the register file(s). - // This method is called from `invalidateRegisterMapping()`. - void freePhysRegs(const RegisterRenamingInfo &Entry, - llvm::MutableArrayRef FreedPhysRegs); - - // Create an instance of RegisterMappingTracker for every register file - // specified by the processor model. - // If no register file is specified, then this method creates a default - // register file with an unbounded number of physical registers. - void initialize(const llvm::MCSchedModel &SM, unsigned NumRegs); - -public: - RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri, - unsigned NumRegs = 0); - - // This method updates the register mappings inserting a new register - // definition. This method is also responsible for updating the number of - // allocated physical registers in each register file modified by the write. - // No physical regiser is allocated when flag ShouldAllocatePhysRegs is set. - void addRegisterWrite(WriteRef Write, - llvm::MutableArrayRef UsedPhysRegs, - bool ShouldAllocatePhysRegs = true); - - // Removes write \param WS from the register mappings. - // Physical registers may be released to reflect this update. - void removeRegisterWrite(const WriteState &WS, - llvm::MutableArrayRef FreedPhysRegs, - bool ShouldFreePhysRegs = true); - - // Checks if there are enough physical registers in the register files. - // Returns a "response mask" where each bit represents the response from a - // different register file. A mask of all zeroes means that all register - // files are available. Otherwise, the mask can be used to identify which - // register file was busy. This sematic allows us to classify dispatch - // stalls caused by the lack of register file resources. - // - // Current implementation can simulate up to 32 register files (including the - // special register file at index #0). - unsigned isAvailable(llvm::ArrayRef Regs) const; - void collectWrites(llvm::SmallVectorImpl &Writes, - unsigned RegID) const; - unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } - -#ifndef NDEBUG - void dump() const; -#endif -}; - -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H Index: llvm/trunk/tools/llvm-mca/RegisterFile.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/RegisterFile.cpp +++ llvm/trunk/tools/llvm-mca/RegisterFile.cpp @@ -1,350 +0,0 @@ -//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a register mapping file class. This class is responsible -/// for managing hardware register files and the tracking of data dependencies -/// between registers. -/// -//===----------------------------------------------------------------------===// - -#include "RegisterFile.h" -#include "Instruction.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -#define DEBUG_TYPE "llvm-mca" - -namespace mca { - -RegisterFile::RegisterFile(const llvm::MCSchedModel &SM, - const llvm::MCRegisterInfo &mri, unsigned NumRegs) - : MRI(mri), RegisterMappings(mri.getNumRegs(), - {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) { - initialize(SM, NumRegs); -} - -void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) { - // Create a default register file that "sees" all the machine registers - // declared by the target. The number of physical registers in the default - // register file is set equal to `NumRegs`. A value of zero for `NumRegs` - // means: this register file has an unbounded number of physical registers. - addRegisterFile({} /* all registers */, NumRegs); - if (!SM.hasExtraProcessorInfo()) - return; - - // For each user defined register file, allocate a RegisterMappingTracker - // object. The size of every register file, as well as the mapping between - // register files and register classes is specified via tablegen. - const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo(); - for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) { - const MCRegisterFileDesc &RF = Info.RegisterFiles[I]; - // Skip invalid register files with zero physical registers. - unsigned Length = RF.NumRegisterCostEntries; - if (!RF.NumPhysRegs) - continue; - // The cost of a register definition is equivalent to the number of - // physical registers that are allocated at register renaming stage. - const MCRegisterCostEntry *FirstElt = - &Info.RegisterCostTable[RF.RegisterCostEntryIdx]; - addRegisterFile(ArrayRef(FirstElt, Length), - RF.NumPhysRegs); - } -} - -void RegisterFile::addRegisterFile(ArrayRef Entries, - unsigned NumPhysRegs) { - // A default register file is always allocated at index #0. That register file - // is mainly used to count the total number of mappings created by all - // register files at runtime. Users can limit the number of available physical - // registers in register file #0 through the command line flag - // `-register-file-size`. - unsigned RegisterFileIndex = RegisterFiles.size(); - RegisterFiles.emplace_back(NumPhysRegs); - - // Special case where there is no register class identifier in the set. - // An empty set of register classes means: this register file contains all - // the physical registers specified by the target. - // We optimistically assume that a register can be renamed at the cost of a - // single physical register. The constructor of RegisterFile ensures that - // a RegisterMapping exists for each logical register defined by the Target. - if (Entries.empty()) - return; - - // Now update the cost of individual registers. - for (const MCRegisterCostEntry &RCE : Entries) { - const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); - for (const MCPhysReg Reg : RC) { - RegisterRenamingInfo &Entry = RegisterMappings[Reg].second; - IndexPlusCostPairTy &IPC = Entry.IndexPlusCost; - if (IPC.first && IPC.first != RegisterFileIndex) { - // The only register file that is allowed to overlap is the default - // register file at index #0. The analysis is inaccurate if register - // files overlap. - errs() << "warning: register " << MRI.getName(Reg) - << " defined in multiple register files."; - } - IPC = std::make_pair(RegisterFileIndex, RCE.Cost); - Entry.RenameAs = Reg; - - // Assume the same cost for each sub-register. - for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) { - RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second; - if (!OtherEntry.IndexPlusCost.first && - (!OtherEntry.RenameAs || - MRI.isSuperRegister(*I, OtherEntry.RenameAs))) { - OtherEntry.IndexPlusCost = IPC; - OtherEntry.RenameAs = Reg; - } - } - } - } -} - -void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef UsedPhysRegs) { - unsigned RegisterFileIndex = Entry.IndexPlusCost.first; - unsigned Cost = Entry.IndexPlusCost.second; - if (RegisterFileIndex) { - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - RMT.NumUsedPhysRegs += Cost; - UsedPhysRegs[RegisterFileIndex] += Cost; - } - - // Now update the default register mapping tracker. - RegisterFiles[0].NumUsedPhysRegs += Cost; - UsedPhysRegs[0] += Cost; -} - -void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef FreedPhysRegs) { - unsigned RegisterFileIndex = Entry.IndexPlusCost.first; - unsigned Cost = Entry.IndexPlusCost.second; - if (RegisterFileIndex) { - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - RMT.NumUsedPhysRegs -= Cost; - FreedPhysRegs[RegisterFileIndex] += Cost; - } - - // Now update the default register mapping tracker. - RegisterFiles[0].NumUsedPhysRegs -= Cost; - FreedPhysRegs[0] += Cost; -} - -void RegisterFile::addRegisterWrite(WriteRef Write, - MutableArrayRef UsedPhysRegs, - bool ShouldAllocatePhysRegs) { - WriteState &WS = *Write.getWriteState(); - unsigned RegID = WS.getRegisterID(); - assert(RegID && "Adding an invalid register definition?"); - - LLVM_DEBUG({ - dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex() - << ", " << MRI.getName(RegID) << "]\n"; - }); - - // If RenameAs is equal to RegID, then RegID is subject to register renaming - // and false dependencies on RegID are all eliminated. - - // If RenameAs references the invalid register, then we optimistically assume - // that it can be renamed. In the absence of tablegen descriptors for register - // files, RenameAs is always set to the invalid register ID. In all other - // cases, RenameAs must be either equal to RegID, or it must reference a - // super-register of RegID. - - // If RenameAs is a super-register of RegID, then a write to RegID has always - // a false dependency on RenameAs. The only exception is for when the write - // implicitly clears the upper portion of the underlying register. - // If a write clears its super-registers, then it is renamed as `RenameAs`. - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - if (RRI.RenameAs && RRI.RenameAs != RegID) { - RegID = RRI.RenameAs; - WriteRef &OtherWrite = RegisterMappings[RegID].first; - - if (!WS.clearsSuperRegisters()) { - // The processor keeps the definition of `RegID` together with register - // `RenameAs`. Since this partial write is not renamed, no physical - // register is allocated. - ShouldAllocatePhysRegs = false; - - if (OtherWrite.getWriteState() && - (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { - // This partial write has a false dependency on RenameAs. - WS.setDependentWrite(OtherWrite.getWriteState()); - } - } - } - - // Update the mapping for register RegID including its sub-registers. - RegisterMappings[RegID].first = Write; - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) - RegisterMappings[*I].first = Write; - - // No physical registers are allocated for instructions that are optimized in - // hardware. For example, zero-latency data-dependency breaking instructions - // don't consume physical registers. - if (ShouldAllocatePhysRegs) - allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); - - if (!WS.clearsSuperRegisters()) - return; - - for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) - RegisterMappings[*I].first = Write; -} - -void RegisterFile::removeRegisterWrite(const WriteState &WS, - MutableArrayRef FreedPhysRegs, - bool ShouldFreePhysRegs) { - unsigned RegID = WS.getRegisterID(); - - assert(RegID != 0 && "Invalidating an already invalid register?"); - assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && - "Invalidating a write of unknown cycles!"); - assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); - - unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; - if (RenameAs && RenameAs != RegID) { - RegID = RenameAs; - - if (!WS.clearsSuperRegisters()) { - // Keep the definition of `RegID` together with register `RenameAs`. - ShouldFreePhysRegs = false; - } - } - - if (ShouldFreePhysRegs) - freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs); - - WriteRef &WR = RegisterMappings[RegID].first; - if (WR.getWriteState() == &WS) - WR.invalidate(); - - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WriteRef &OtherWR = RegisterMappings[*I].first; - if (OtherWR.getWriteState() == &WS) - OtherWR.invalidate(); - } - - if (!WS.clearsSuperRegisters()) - return; - - for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WriteRef &OtherWR = RegisterMappings[*I].first; - if (OtherWR.getWriteState() == &WS) - OtherWR.invalidate(); - } -} - -void RegisterFile::collectWrites(SmallVectorImpl &Writes, - unsigned RegID) const { - assert(RegID && RegID < RegisterMappings.size()); - LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " - << MRI.getName(RegID) << '\n'); - const WriteRef &WR = RegisterMappings[RegID].first; - if (WR.isValid()) - Writes.push_back(WR); - - // Handle potential partial register updates. - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - const WriteRef &WR = RegisterMappings[*I].first; - if (WR.isValid()) - Writes.push_back(WR); - } - - // Remove duplicate entries and resize the input vector. - llvm::sort(Writes.begin(), Writes.end(), - [](const WriteRef &Lhs, const WriteRef &Rhs) { - return Lhs.getWriteState() < Rhs.getWriteState(); - }); - auto It = std::unique(Writes.begin(), Writes.end()); - Writes.resize(std::distance(Writes.begin(), It)); - - LLVM_DEBUG({ - for (const WriteRef &WR : Writes) { - const WriteState &WS = *WR.getWriteState(); - dbgs() << "[PRF] Found a dependent use of Register " - << MRI.getName(WS.getRegisterID()) << " (defined by intruction #" - << WR.getSourceIndex() << ")\n"; - } - }); -} - -unsigned RegisterFile::isAvailable(ArrayRef Regs) const { - SmallVector NumPhysRegs(getNumRegisterFiles()); - - // Find how many new mappings must be created for each register file. - for (const unsigned RegID : Regs) { - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; - if (Entry.first) - NumPhysRegs[Entry.first] += Entry.second; - NumPhysRegs[0] += Entry.second; - } - - unsigned Response = 0; - for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { - unsigned NumRegs = NumPhysRegs[I]; - if (!NumRegs) - continue; - - const RegisterMappingTracker &RMT = RegisterFiles[I]; - if (!RMT.NumPhysRegs) { - // The register file has an unbounded number of microarchitectural - // registers. - continue; - } - - if (RMT.NumPhysRegs < NumRegs) { - // The current register file is too small. This may occur if the number of - // microarchitectural registers in register file #0 was changed by the - // users via flag -reg-file-size. Alternatively, the scheduling model - // specified a too small number of registers for this register file. - LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n"); - - // FIXME: Normalize the instruction register count to match the - // NumPhysRegs value. This is a highly unusual case, and is not expected - // to occur. This normalization is hiding an inconsistency in either the - // scheduling model or in the value that the user might have specified - // for NumPhysRegs. - NumRegs = RMT.NumPhysRegs; - } - - if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs)) - Response |= (1U << I); - } - - return Response; -} - -#ifndef NDEBUG -void RegisterFile::dump() const { - for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { - const RegisterMapping &RM = RegisterMappings[I]; - if (!RM.first.getWriteState()) - continue; - const RegisterRenamingInfo &RRI = RM.second; - dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first - << ", Cost=" << RRI.IndexPlusCost.second - << ", RenameAs=" << RRI.RenameAs << ", "; - RM.first.dump(); - dbgs() << '\n'; - } - - for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { - dbgs() << "Register File #" << I; - const RegisterMappingTracker &RMT = RegisterFiles[I]; - dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs - << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n'; - } -} -#endif - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/ResourceManager.h =================================================================== --- llvm/trunk/tools/llvm-mca/ResourceManager.h +++ llvm/trunk/tools/llvm-mca/ResourceManager.h @@ -1,360 +0,0 @@ -//===--------------------- ResourceManager.h --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// The classes here represent processor resource units and their management -/// strategy. These classes are managed by the Scheduler. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H -#define LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H - -#include "Instruction.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" - -namespace mca { - -/// Used to notify the internal state of a processor resource. -/// -/// A processor resource is available if it is not reserved, and there are -/// available slots in the buffer. A processor resource is unavailable if it -/// is either reserved, or the associated buffer is full. A processor resource -/// with a buffer size of -1 is always available if it is not reserved. -/// -/// Values of type ResourceStateEvent are returned by method -/// ResourceState::isBufferAvailable(), which is used to query the internal -/// state of a resource. -/// -/// The naming convention for resource state events is: -/// * Event names start with prefix RS_ -/// * Prefix RS_ is followed by a string describing the actual resource state. -enum ResourceStateEvent { - RS_BUFFER_AVAILABLE, - RS_BUFFER_UNAVAILABLE, - RS_RESERVED -}; - -/// Resource allocation strategy used by hardware scheduler resources. -class ResourceStrategy { - ResourceStrategy(const ResourceStrategy &) = delete; - ResourceStrategy &operator=(const ResourceStrategy &) = delete; - -public: - ResourceStrategy() {} - virtual ~ResourceStrategy(); - - /// Selects a processor resource unit from a ReadyMask. - virtual uint64_t select(uint64_t ReadyMask) = 0; - - /// Called by the ResourceManager when a processor resource group, or a - /// processor resource with multiple units has become unavailable. - /// - /// The default strategy uses this information to bias its selection logic. - virtual void used(uint64_t ResourceMask) {} -}; - -/// Default resource allocation strategy used by processor resource groups and -/// processor resources with multiple units. -class DefaultResourceStrategy final : public ResourceStrategy { - /// A Mask of resource unit identifiers. - /// - /// There is one bit set for every available resource unit. - /// It defaults to the value of field ResourceSizeMask in ResourceState. - const unsigned ResourceUnitMask; - - /// A simple round-robin selector for processor resource units. - /// Each bit of this mask identifies a sub resource within a group. - /// - /// As an example, lets assume that this is a default policy for a - /// processor resource group composed by the following three units: - /// ResourceA -- 0b001 - /// ResourceB -- 0b010 - /// ResourceC -- 0b100 - /// - /// Field NextInSequenceMask is used to select the next unit from the set of - /// resource units. It defaults to the value of field `ResourceUnitMasks` (in - /// this example, it defaults to mask '0b111'). - /// - /// The round-robin selector would firstly select 'ResourceC', then - /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the - /// corresponding bit in NextInSequenceMask is cleared. For example, if - /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes - /// 0xb011. - /// - /// When NextInSequenceMask becomes zero, it is automatically reset to the - /// default value (i.e. ResourceUnitMask). - uint64_t NextInSequenceMask; - - /// This field is used to track resource units that are used (i.e. selected) - /// by other groups other than the one associated with this strategy object. - /// - /// In LLVM processor resource groups are allowed to partially (or fully) - /// overlap. That means, a same unit may be visible to multiple groups. - /// This field keeps track of uses that have originated from outside of - /// this group. The idea is to bias the selection strategy, so that resources - /// that haven't been used by other groups get prioritized. - /// - /// The end goal is to (try to) keep the resource distribution as much uniform - /// as possible. By construction, this mask only tracks one-level of resource - /// usage. Therefore, this strategy is expected to be less accurate when same - /// units are used multiple times by other groups within a single round of - /// select. - /// - /// Note: an LRU selector would have a better accuracy at the cost of being - /// slightly more expensive (mostly in terms of runtime cost). Methods - /// 'select' and 'used', are always in the hot execution path of llvm-mca. - /// Therefore, a slow implementation of 'select' would have a negative impact - /// on the overall performance of the tool. - uint64_t RemovedFromNextInSequence; - - void skipMask(uint64_t Mask); - -public: - DefaultResourceStrategy(uint64_t UnitMask) - : ResourceStrategy(), ResourceUnitMask(UnitMask), - NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {} - virtual ~DefaultResourceStrategy() = default; - - uint64_t select(uint64_t ReadyMask) override; - void used(uint64_t Mask) override; -}; - -/// A processor resource descriptor. -/// -/// There is an instance of this class for every processor resource defined by -/// the machine scheduling model. -/// Objects of class ResourceState dynamically track the usage of processor -/// resource units. -class ResourceState { - /// An index to the MCProcResourceDesc entry in the processor model. - const unsigned ProcResourceDescIndex; - /// A resource mask. This is generated by the tool with the help of - /// function `mca::createProcResourceMasks' (see Support.h). - const uint64_t ResourceMask; - - /// A ProcResource can have multiple units. - /// - /// For processor resource groups, - /// this field default to the value of field `ResourceMask`; the number of - /// bits set is equal to the cardinality of the group. For normal (i.e. - /// non-group) resources, the number of bits set in this mask is equivalent - /// to the number of units declared by the processor model (see field - /// 'NumUnits' in 'ProcResourceUnits'). - uint64_t ResourceSizeMask; - - /// A mask of ready units. - uint64_t ReadyMask; - - /// Buffered resources will have this field set to a positive number different - /// than zero. A buffered resource behaves like a reservation station - /// implementing its own buffer for out-of-order execution. - /// - /// A BufferSize of 1 is used by scheduler resources that force in-order - /// execution. - /// - /// A BufferSize of 0 is used to model in-order issue/dispatch resources. - /// Since in-order issue/dispatch resources don't implement buffers, dispatch - /// events coincide with issue events. - /// Also, no other instruction ca be dispatched/issue while this resource is - /// in use. Only when all the "resource cycles" are consumed (after the issue - /// event), a new instruction ca be dispatched. - const int BufferSize; - - /// Available slots in the buffer (zero, if this is not a buffered resource). - unsigned AvailableSlots; - - /// This field is set if this resource is currently reserved. - /// - /// Resources can be reserved for a number of cycles. - /// Instructions can still be dispatched to reserved resources. However, - /// istructions dispatched to a reserved resource cannot be issued to the - /// underlying units (i.e. pipelines) until the resource is released. - bool Unavailable; - - /// Checks for the availability of unit 'SubResMask' in the group. - bool isSubResourceReady(uint64_t SubResMask) const { - return ReadyMask & SubResMask; - } - -public: - ResourceState(const llvm::MCProcResourceDesc &Desc, unsigned Index, - uint64_t Mask); - - unsigned getProcResourceID() const { return ProcResourceDescIndex; } - uint64_t getResourceMask() const { return ResourceMask; } - uint64_t getReadyMask() const { return ReadyMask; } - int getBufferSize() const { return BufferSize; } - - bool isBuffered() const { return BufferSize > 0; } - bool isInOrder() const { return BufferSize == 1; } - - /// Returns true if this is an in-order dispatch/issue resource. - bool isADispatchHazard() const { return BufferSize == 0; } - bool isReserved() const { return Unavailable; } - - void setReserved() { Unavailable = true; } - void clearReserved() { Unavailable = false; } - - /// Returs true if this resource is not reserved, and if there are at least - /// `NumUnits` available units. - bool isReady(unsigned NumUnits = 1) const; - - bool isAResourceGroup() const { - return llvm::countPopulation(ResourceMask) > 1; - } - - bool containsResource(uint64_t ID) const { return ResourceMask & ID; } - - void markSubResourceAsUsed(uint64_t ID) { - assert(isSubResourceReady(ID)); - ReadyMask ^= ID; - } - - void releaseSubResource(uint64_t ID) { - assert(!isSubResourceReady(ID)); - ReadyMask ^= ID; - } - - unsigned getNumUnits() const { - return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask); - } - - /// Checks if there is an available slot in the resource buffer. - /// - /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if - /// there is a slot available. - /// - /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it - /// is reserved. - /// - /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. - ResourceStateEvent isBufferAvailable() const; - - /// Reserve a slot in the buffer. - void reserveBuffer() { - if (AvailableSlots) - AvailableSlots--; - } - - /// Release a slot in the buffer. - void releaseBuffer() { - if (BufferSize > 0) - AvailableSlots++; - assert(AvailableSlots <= static_cast(BufferSize)); - } - -#ifndef NDEBUG - void dump() const; -#endif -}; - -/// A resource unit identifier. -/// -/// This is used to identify a specific processor resource unit using a pair -/// of indices where the 'first' index is a processor resource mask, and the -/// 'second' index is an index for a "sub-resource" (i.e. unit). -typedef std::pair ResourceRef; - -// First: a MCProcResourceDesc index identifying a buffered resource. -// Second: max number of buffer entries used in this resource. -typedef std::pair BufferUsageEntry; - -/// A resource manager for processor resource units and groups. -/// -/// This class owns all the ResourceState objects, and it is responsible for -/// acting on requests from a Scheduler by updating the internal state of -/// ResourceState objects. -/// This class doesn't know about instruction itineraries and functional units. -/// In future, it can be extended to support itineraries too through the same -/// public interface. -class ResourceManager { - // The resource manager owns all the ResourceState. - std::vector> Resources; - std::vector> Strategies; - - // Keeps track of which resources are busy, and how many cycles are left - // before those become usable again. - llvm::SmallDenseMap BusyResources; - - // A table to map processor resource IDs to processor resource masks. - llvm::SmallVector ProcResID2Mask; - - // Returns the actual resource unit that will be used. - ResourceRef selectPipe(uint64_t ResourceID); - - void use(const ResourceRef &RR); - void release(const ResourceRef &RR); - - unsigned getNumUnits(uint64_t ResourceID) const; - - // Overrides the selection strategy for the processor resource with the given - // mask. - void setCustomStrategyImpl(std::unique_ptr S, - uint64_t ResourceMask); - -public: - ResourceManager(const llvm::MCSchedModel &SM); - virtual ~ResourceManager() = default; - - // Overrides the selection strategy for the resource at index ResourceID in - // the MCProcResourceDesc table. - void setCustomStrategy(std::unique_ptr S, - unsigned ResourceID) { - assert(ResourceID < ProcResID2Mask.size() && - "Invalid resource index in input!"); - return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]); - } - - // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if - // there are enough available slots in the buffers. - ResourceStateEvent canBeDispatched(llvm::ArrayRef Buffers) const; - - // Return the processor resource identifier associated to this Mask. - unsigned resolveResourceMask(uint64_t Mask) const; - - // Consume a slot in every buffered resource from array 'Buffers'. Resource - // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. - void reserveBuffers(llvm::ArrayRef Buffers); - - // Release buffer entries previously allocated by method reserveBuffers. - void releaseBuffers(llvm::ArrayRef Buffers); - - // Reserve a processor resource. A reserved resource is not available for - // instruction issue until it is released. - void reserveResource(uint64_t ResourceID); - - // Release a previously reserved processor resource. - void releaseResource(uint64_t ResourceID); - - // Returns true if all resources are in-order, and there is at least one - // resource which is a dispatch hazard (BufferSize = 0). - bool mustIssueImmediately(const InstrDesc &Desc) const; - - bool canBeIssued(const InstrDesc &Desc) const; - - void issueInstruction( - const InstrDesc &Desc, - llvm::SmallVectorImpl> &Pipes); - - void cycleEvent(llvm::SmallVectorImpl &ResourcesFreed); - -#ifndef NDEBUG - void dump() const { - for (const std::unique_ptr &Resource : Resources) - Resource->dump(); - } -#endif -}; -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H Index: llvm/trunk/tools/llvm-mca/ResourceManager.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/ResourceManager.cpp +++ llvm/trunk/tools/llvm-mca/ResourceManager.cpp @@ -1,309 +0,0 @@ -//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// The classes here represent processor resource units and their management -/// strategy. These classes are managed by the Scheduler. -/// -//===----------------------------------------------------------------------===// - -#include "ResourceManager.h" -#include "Support.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace mca { - -using namespace llvm; - -#define DEBUG_TYPE "llvm-mca" -ResourceStrategy::~ResourceStrategy() = default; - -void DefaultResourceStrategy::skipMask(uint64_t Mask) { - NextInSequenceMask &= (~Mask); - if (!NextInSequenceMask) { - NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; - RemovedFromNextInSequence = 0; - } -} - -uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) { - // This method assumes that ReadyMask cannot be zero. - uint64_t CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask); - while (!(ReadyMask & CandidateMask)) { - skipMask(CandidateMask); - CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask); - } - return CandidateMask; -} - -void DefaultResourceStrategy::used(uint64_t Mask) { - if (Mask > NextInSequenceMask) { - RemovedFromNextInSequence |= Mask; - return; - } - skipMask(Mask); -} - -ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index, - uint64_t Mask) - : ProcResourceDescIndex(Index), ResourceMask(Mask), - BufferSize(Desc.BufferSize) { - if (llvm::countPopulation(ResourceMask) > 1) - ResourceSizeMask = ResourceMask ^ llvm::PowerOf2Floor(ResourceMask); - else - ResourceSizeMask = (1ULL << Desc.NumUnits) - 1; - ReadyMask = ResourceSizeMask; - AvailableSlots = BufferSize == -1 ? 0U : static_cast(BufferSize); - Unavailable = false; -} - -bool ResourceState::isReady(unsigned NumUnits) const { - return (!isReserved() || isADispatchHazard()) && - llvm::countPopulation(ReadyMask) >= NumUnits; -} - -ResourceStateEvent ResourceState::isBufferAvailable() const { - if (isADispatchHazard() && isReserved()) - return RS_RESERVED; - if (!isBuffered() || AvailableSlots) - return RS_BUFFER_AVAILABLE; - return RS_BUFFER_UNAVAILABLE; -} - -#ifndef NDEBUG -void ResourceState::dump() const { - dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask - << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize - << ", AvailableSlots=" << AvailableSlots - << ", Reserved=" << Unavailable << '\n'; -} -#endif - -static unsigned getResourceStateIndex(uint64_t Mask) { - return std::numeric_limits::digits - llvm::countLeadingZeros(Mask); -} - -static std::unique_ptr -getStrategyFor(const ResourceState &RS) { - if (RS.isAResourceGroup() || RS.getNumUnits() > 1) - return llvm::make_unique(RS.getReadyMask()); - return std::unique_ptr(nullptr); -} - -ResourceManager::ResourceManager(const MCSchedModel &SM) - : ProcResID2Mask(SM.getNumProcResourceKinds()) { - computeProcResourceMasks(SM, ProcResID2Mask); - Resources.resize(SM.getNumProcResourceKinds()); - Strategies.resize(SM.getNumProcResourceKinds()); - - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - uint64_t Mask = ProcResID2Mask[I]; - unsigned Index = getResourceStateIndex(Mask); - Resources[Index] = - llvm::make_unique(*SM.getProcResource(I), I, Mask); - Strategies[Index] = getStrategyFor(*Resources[Index]); - } -} - -void ResourceManager::setCustomStrategyImpl(std::unique_ptr S, - uint64_t ResourceMask) { - unsigned Index = getResourceStateIndex(ResourceMask); - assert(Index < Resources.size() && "Invalid processor resource index!"); - assert(S && "Unexpected null strategy in input!"); - Strategies[Index] = std::move(S); -} - -unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const { - return Resources[getResourceStateIndex(Mask)]->getProcResourceID(); -} - -unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const { - return Resources[getResourceStateIndex(ResourceID)]->getNumUnits(); -} - -// Returns the actual resource consumed by this Use. -// First, is the primary resource ID. -// Second, is the specific sub-resource ID. -ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) { - unsigned Index = getResourceStateIndex(ResourceID); - ResourceState &RS = *Resources[Index]; - assert(RS.isReady() && "No available units to select!"); - - // Special case where RS is not a group, and it only declares a single - // resource unit. - if (!RS.isAResourceGroup() && RS.getNumUnits() == 1) - return std::make_pair(ResourceID, RS.getReadyMask()); - - uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask()); - if (RS.isAResourceGroup()) - return selectPipe(SubResourceID); - return std::make_pair(ResourceID, SubResourceID); -} - -void ResourceManager::use(const ResourceRef &RR) { - // Mark the sub-resource referenced by RR as used. - ResourceState &RS = *Resources[getResourceStateIndex(RR.first)]; - RS.markSubResourceAsUsed(RR.second); - // If there are still available units in RR.first, - // then we are done. - if (RS.isReady()) - return; - - // Notify to other resources that RR.first is no longer available. - for (std::unique_ptr &Res : Resources) { - ResourceState &Current = *Res; - if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) - continue; - - if (Current.containsResource(RR.first)) { - unsigned Index = getResourceStateIndex(Current.getResourceMask()); - Current.markSubResourceAsUsed(RR.first); - Strategies[Index]->used(RR.first); - } - } -} - -void ResourceManager::release(const ResourceRef &RR) { - ResourceState &RS = *Resources[getResourceStateIndex(RR.first)]; - bool WasFullyUsed = !RS.isReady(); - RS.releaseSubResource(RR.second); - if (!WasFullyUsed) - return; - - for (std::unique_ptr &Res : Resources) { - ResourceState &Current = *Res; - if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) - continue; - - if (Current.containsResource(RR.first)) - Current.releaseSubResource(RR.first); - } -} - -ResourceStateEvent -ResourceManager::canBeDispatched(ArrayRef Buffers) const { - ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; - for (uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - Result = RS.isBufferAvailable(); - if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) - break; - } - return Result; -} - -void ResourceManager::reserveBuffers(ArrayRef Buffers) { - for (const uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); - RS.reserveBuffer(); - - if (RS.isADispatchHazard()) { - assert(!RS.isReserved()); - RS.setReserved(); - } - } -} - -void ResourceManager::releaseBuffers(ArrayRef Buffers) { - for (const uint64_t R : Buffers) - Resources[getResourceStateIndex(R)]->releaseBuffer(); -} - -bool ResourceManager::canBeIssued(const InstrDesc &Desc) const { - return std::all_of(Desc.Resources.begin(), Desc.Resources.end(), - [&](const std::pair &E) { - unsigned NumUnits = - E.second.isReserved() ? 0U : E.second.NumUnits; - unsigned Index = getResourceStateIndex(E.first); - return Resources[Index]->isReady(NumUnits); - }); -} - -// Returns true if all resources are in-order, and there is at least one -// resource which is a dispatch hazard (BufferSize = 0). -bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const { - if (!canBeIssued(Desc)) - return false; - bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) { - unsigned Index = getResourceStateIndex(BufferMask); - const ResourceState &Resource = *Resources[Index]; - return Resource.isInOrder() || Resource.isADispatchHazard(); - }); - if (!AllInOrderResources) - return false; - - return any_of(Desc.Buffers, [&](uint64_t BufferMask) { - return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard(); - }); -} - -void ResourceManager::issueInstruction( - const InstrDesc &Desc, - SmallVectorImpl> &Pipes) { - for (const std::pair &R : Desc.Resources) { - const CycleSegment &CS = R.second.CS; - if (!CS.size()) { - releaseResource(R.first); - continue; - } - - assert(CS.begin() == 0 && "Invalid {Start, End} cycles!"); - if (!R.second.isReserved()) { - ResourceRef Pipe = selectPipe(R.first); - use(Pipe); - BusyResources[Pipe] += CS.size(); - // Replace the resource mask with a valid processor resource index. - const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)]; - Pipe.first = RS.getProcResourceID(); - Pipes.emplace_back( - std::pair(Pipe, static_cast(CS.size()))); - } else { - assert((countPopulation(R.first) > 1) && "Expected a group!"); - // Mark this group as reserved. - assert(R.second.isReserved()); - reserveResource(R.first); - BusyResources[ResourceRef(R.first, R.first)] += CS.size(); - } - } -} - -void ResourceManager::cycleEvent(SmallVectorImpl &ResourcesFreed) { - for (std::pair &BR : BusyResources) { - if (BR.second) - BR.second--; - if (!BR.second) { - // Release this resource. - const ResourceRef &RR = BR.first; - - if (countPopulation(RR.first) == 1) - release(RR); - - releaseResource(RR.first); - ResourcesFreed.push_back(RR); - } - } - - for (const ResourceRef &RF : ResourcesFreed) - BusyResources.erase(RF); -} - -void ResourceManager::reserveResource(uint64_t ResourceID) { - ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; - assert(!Resource.isReserved()); - Resource.setReserved(); -} - -void ResourceManager::releaseResource(uint64_t ResourceID) { - ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; - Resource.clearReserved(); -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/RetireControlUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/RetireControlUnit.h +++ llvm/trunk/tools/llvm-mca/RetireControlUnit.h @@ -1,97 +0,0 @@ -//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file simulates the hardware responsible for retiring instructions. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H -#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H - -#include "HardwareUnit.h" -#include "Instruction.h" -#include "llvm/MC/MCSchedule.h" -#include - -namespace mca { - -/// This class tracks which instructions are in-flight (i.e., dispatched but not -/// retired) in the OoO backend. -// -/// This class checks on every cycle if/which instructions can be retired. -/// Instructions are retired in program order. -/// In the event of an instruction being retired, the pipeline that owns -/// this RetireControlUnit (RCU) gets notified. -/// -/// On instruction retired, register updates are all architecturally -/// committed, and any physicall registers previously allocated for the -/// retired instruction are freed. -struct RetireControlUnit : public HardwareUnit { - // A RUToken is created by the RCU for every instruction dispatched to the - // schedulers. These "tokens" are managed by the RCU in its token Queue. - // - // On every cycle ('cycleEvent'), the RCU iterates through the token queue - // looking for any token with its 'Executed' flag set. If a token has that - // flag set, then the instruction has reached the write-back stage and will - // be retired by the RCU. - // - // 'NumSlots' represents the number of entries consumed by the instruction in - // the reorder buffer. Those entries will become available again once the - // instruction is retired. - // - // Note that the size of the reorder buffer is defined by the scheduling - // model via field 'NumMicroOpBufferSize'. - struct RUToken { - InstRef IR; - unsigned NumSlots; // Slots reserved to this instruction. - bool Executed; // True if the instruction is past the WB stage. - }; - -private: - unsigned NextAvailableSlotIdx; - unsigned CurrentInstructionSlotIdx; - unsigned AvailableSlots; - unsigned MaxRetirePerCycle; // 0 means no limit. - std::vector Queue; - -public: - RetireControlUnit(const llvm::MCSchedModel &SM); - - bool isEmpty() const { return AvailableSlots == Queue.size(); } - bool isAvailable(unsigned Quantity = 1) const { - // Some instructions may declare a number of uOps which exceeds the size - // of the reorder buffer. To avoid problems, cap the amount of slots to - // the size of the reorder buffer. - Quantity = std::min(Quantity, static_cast(Queue.size())); - return AvailableSlots >= Quantity; - } - - unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; } - - // Reserves a number of slots, and returns a new token. - unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps); - - // Return the current token from the RCU's circular token queue. - const RUToken &peekCurrentToken() const; - - // Advance the pointer to the next token in the circular token queue. - void consumeCurrentToken(); - - // Update the RCU token to represent the executed state. - void onInstructionExecuted(unsigned TokenID); - -#ifndef NDEBUG - void dump() const; -#endif -}; - -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H Index: llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp +++ llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp @@ -1,87 +0,0 @@ -//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file simulates the hardware responsible for retiring instructions. -/// -//===----------------------------------------------------------------------===// - -#include "RetireControlUnit.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -#define DEBUG_TYPE "llvm-mca" - -namespace mca { - -RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM) - : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { - // Check if the scheduling model provides extra information about the machine - // processor. If so, then use that information to set the reorder buffer size - // and the maximum number of instructions retired per cycle. - if (SM.hasExtraProcessorInfo()) { - const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); - if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; - MaxRetirePerCycle = EPI.MaxRetirePerCycle; - } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); -} - -// Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(const InstRef &IR, - unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps)); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast(Queue.size())); - // Zero latency instructions may have zero mOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); - unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; - NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; - return TokenID; -} - -const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { - return Queue[CurrentInstructionSlotIdx]; -} - -void RetireControlUnit::consumeCurrentToken() { - const RetireControlUnit::RUToken &Current = peekCurrentToken(); - assert(Current.NumSlots && "Reserved zero slots?"); - assert(Current.IR.isValid() && "Invalid RUToken in the RCU queue."); - - // Update the slot index to be the next item in the circular queue. - CurrentInstructionSlotIdx += Current.NumSlots; - CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; -} - -void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { - assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].IR.isValid()); - Queue[TokenID].Executed = true; -} - -#ifndef NDEBUG -void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; -} -#endif - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/RetireStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/RetireStage.h +++ llvm/trunk/tools/llvm-mca/RetireStage.h @@ -1,46 +0,0 @@ -//===---------------------- RetireStage.h -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the retire stage of a default instruction pipeline. -/// The RetireStage represents the process logic that interacts with the -/// simulated RetireControlUnit hardware. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H - -#include "RegisterFile.h" -#include "RetireControlUnit.h" -#include "Stage.h" - -namespace mca { - -class RetireStage final : public Stage { - // Owner will go away when we move listeners/eventing to the stages. - RetireControlUnit &RCU; - RegisterFile &PRF; - - RetireStage(const RetireStage &Other) = delete; - RetireStage &operator=(const RetireStage &Other) = delete; - -public: - RetireStage(RetireControlUnit &R, RegisterFile &F) - : Stage(), RCU(R), PRF(F) {} - - bool hasWorkToComplete() const override { return !RCU.isEmpty(); } - llvm::Error cycleStart() override; - llvm::Error execute(InstRef &IR) override; - void notifyInstructionRetired(const InstRef &IR); -}; - -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H Index: llvm/trunk/tools/llvm-mca/RetireStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/RetireStage.cpp +++ llvm/trunk/tools/llvm-mca/RetireStage.cpp @@ -1,62 +0,0 @@ -//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the retire stage of an instruction pipeline. -/// The RetireStage represents the process logic that interacts with the -/// simulated RetireControlUnit hardware. -/// -//===----------------------------------------------------------------------===// - -#include "RetireStage.h" -#include "HWEventListener.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace mca { - -llvm::Error RetireStage::cycleStart() { - if (RCU.isEmpty()) - return llvm::ErrorSuccess(); - - const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle(); - unsigned NumRetired = 0; - while (!RCU.isEmpty()) { - if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) - break; - const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); - if (!Current.Executed) - break; - RCU.consumeCurrentToken(); - notifyInstructionRetired(Current.IR); - NumRetired++; - } - - return llvm::ErrorSuccess(); -} - -llvm::Error RetireStage::execute(InstRef &IR) { - RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID()); - return llvm::ErrorSuccess(); -} - -void RetireStage::notifyInstructionRetired(const InstRef &IR) { - LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n'); - llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); - const Instruction &Inst = *IR.getInstruction(); - const InstrDesc &Desc = Inst.getDesc(); - - bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking()); - for (const std::unique_ptr &WS : Inst.getDefs()) - PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs); - notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/Scheduler.h =================================================================== --- llvm/trunk/tools/llvm-mca/Scheduler.h +++ llvm/trunk/tools/llvm-mca/Scheduler.h @@ -1,212 +0,0 @@ -//===--------------------- Scheduler.h ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A scheduler for Processor Resource Units and Processor Resource Groups. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H -#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H - -#include "HardwareUnit.h" -#include "LSUnit.h" -#include "ResourceManager.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" - -namespace mca { - -class SchedulerStrategy { -public: - SchedulerStrategy() = default; - virtual ~SchedulerStrategy(); - - /// Returns true if Lhs should take priority over Rhs. - /// - /// This method is used by class Scheduler to select the "best" ready - /// instruction to issue to the underlying pipelines. - virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0; -}; - -/// Default instruction selection strategy used by class Scheduler. -class DefaultSchedulerStrategy : public SchedulerStrategy { - /// This method ranks instructions based on their age, and the number of known - /// users. The lower the rank value, the better. - int computeRank(const InstRef &Lhs) const { - return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers(); - } - -public: - DefaultSchedulerStrategy() = default; - virtual ~DefaultSchedulerStrategy(); - - bool compare(const InstRef &Lhs, const InstRef &Rhs) const override { - int LhsRank = computeRank(Lhs); - int RhsRank = computeRank(Rhs); - - /// Prioritize older instructions over younger instructions to minimize the - /// pressure on the reorder buffer. - if (LhsRank == RhsRank) - return Lhs.getSourceIndex() < Rhs.getSourceIndex(); - return LhsRank < RhsRank; - } -}; - -/// Class Scheduler is responsible for issuing instructions to pipeline -/// resources. -/// -/// Internally, it delegates to a ResourceManager the management of processor -/// resources. This class is also responsible for tracking the progress of -/// instructions from the dispatch stage, until the write-back stage. -/// -/// An instruction dispatched to the Scheduler is initially placed into either -/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input -/// operands. -/// -/// An instruction is moved from the WaitSet to the ReadySet when register -/// operands become available, and all memory dependencies are met. -/// Instructions that are moved from the WaitSet to the ReadySet transition -/// in state from 'IS_AVAILABLE' to 'IS_READY'. -/// -/// On every cycle, the Scheduler checks if it can promote instructions from the -/// WaitSet to the ReadySet. -/// -/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued -/// to a (one or more) pipeline(s). This event also causes an instruction state -/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction -/// leaves the IssuedSet when it reaches the write-back stage. -class Scheduler : public HardwareUnit { - LSUnit *LSU; - - // Instruction selection strategy for this Scheduler. - std::unique_ptr Strategy; - - // Hardware resources that are managed by this scheduler. - std::unique_ptr Resources; - - std::vector WaitSet; - std::vector ReadySet; - std::vector IssuedSet; - - /// Verify the given selection strategy and set the Strategy member - /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is - /// used. - void initializeStrategy(std::unique_ptr S); - - /// Issue an instruction without updating the ready queue. - void issueInstructionImpl( - InstRef &IR, - llvm::SmallVectorImpl> &Pipes); - - // Identify instructions that have finished executing, and remove them from - // the IssuedSet. References to executed instructions are added to input - // vector 'Executed'. - void updateIssuedSet(llvm::SmallVectorImpl &Executed); - - // Try to promote instructions from WaitSet to ReadySet. - // Add promoted instructions to the 'Ready' vector in input. - void promoteToReadySet(llvm::SmallVectorImpl &Ready); - -public: - Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu) - : LSU(Lsu), Resources(llvm::make_unique(Model)) { - initializeStrategy(nullptr); - } - Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu, - std::unique_ptr SelectStrategy) - : LSU(Lsu), Resources(llvm::make_unique(Model)) { - initializeStrategy(std::move(SelectStrategy)); - } - Scheduler(std::unique_ptr RM, LSUnit *Lsu, - std::unique_ptr SelectStrategy) - : LSU(Lsu), Resources(std::move(RM)) { - initializeStrategy(std::move(SelectStrategy)); - } - - // Stalls generated by the scheduler. - enum Status { - SC_AVAILABLE, - SC_LOAD_QUEUE_FULL, - SC_STORE_QUEUE_FULL, - SC_BUFFERS_FULL, - SC_DISPATCH_GROUP_STALL, - }; - - /// Check if the instruction in 'IR' can be dispatched and returns an answer - /// in the form of a Status value. - /// - /// The DispatchStage is responsible for querying the Scheduler before - /// dispatching new instructions. This routine is used for performing such - /// a query. If the instruction 'IR' can be dispatched, then true is - /// returned, otherwise false is returned with Event set to the stall type. - /// Internally, it also checks if the load/store unit is available. - Status isAvailable(const InstRef &IR) const; - - /// Reserves buffer and LSUnit queue resources that are necessary to issue - /// this instruction. - /// - /// Returns true if instruction IR is ready to be issued to the underlying - /// pipelines. Note that this operation cannot fail; it assumes that a - /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`. - void dispatch(const InstRef &IR); - - /// Returns true if IR is ready to be executed by the underlying pipelines. - /// This method assumes that IR has been previously dispatched. - bool isReady(const InstRef &IR) const; - - /// Issue an instruction and populates a vector of used pipeline resources, - /// and a vector of instructions that transitioned to the ready state as a - /// result of this event. - void - issueInstruction(InstRef &IR, - llvm::SmallVectorImpl> &Used, - llvm::SmallVectorImpl &Ready); - - /// Returns true if IR has to be issued immediately, or if IR is a zero - /// latency instruction. - bool mustIssueImmediately(const InstRef &IR) const; - - /// This routine notifies the Scheduler that a new cycle just started. - /// - /// It notifies the underlying ResourceManager that a new cycle just started. - /// Vector `Freed` is populated with resourceRef related to resources that - /// have changed in state, and that are now available to new instructions. - /// Instructions executed are added to vector Executed, while vector Ready is - /// populated with instructions that have become ready in this new cycle. - void cycleEvent(llvm::SmallVectorImpl &Freed, - llvm::SmallVectorImpl &Ready, - llvm::SmallVectorImpl &Executed); - - /// Convert a resource mask into a valid llvm processor resource identifier. - unsigned getResourceID(uint64_t Mask) const { - return Resources->resolveResourceMask(Mask); - } - - /// Select the next instruction to issue from the ReadySet. Returns an invalid - /// instruction reference if there are no ready instructions, or if processor - /// resources are not available. - InstRef select(); - -#ifndef NDEBUG - // Update the ready queues. - void dump() const; - - // This routine performs a sanity check. This routine should only be called - // when we know that 'IR' is not in the scheduler's instruction queues. - void sanityCheck(const InstRef &IR) const { - assert(llvm::find(WaitSet, IR) == WaitSet.end()); - assert(llvm::find(ReadySet, IR) == ReadySet.end()); - assert(llvm::find(IssuedSet, IR) == IssuedSet.end()); - } -#endif // !NDEBUG -}; -} // namespace mca - -#endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H Index: llvm/trunk/tools/llvm-mca/Scheduler.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Scheduler.cpp +++ llvm/trunk/tools/llvm-mca/Scheduler.cpp @@ -1,244 +0,0 @@ -//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// A scheduler for processor resource units and processor resource groups. -// -//===----------------------------------------------------------------------===// - -#include "Scheduler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace mca { - -using namespace llvm; - -#define DEBUG_TYPE "llvm-mca" - -void Scheduler::initializeStrategy(std::unique_ptr S) { - // Ensure we have a valid (non-null) strategy object. - Strategy = S ? std::move(S) : llvm::make_unique(); -} - -// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. -SchedulerStrategy::~SchedulerStrategy() = default; -DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; - -#ifndef NDEBUG -void Scheduler::dump() const { - dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; - dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; - dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; - Resources->dump(); -} -#endif - -Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - - switch (Resources->canBeDispatched(Desc.Buffers)) { - case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: - return Scheduler::SC_BUFFERS_FULL; - case ResourceStateEvent::RS_RESERVED: - return Scheduler::SC_DISPATCH_GROUP_STALL; - case ResourceStateEvent::RS_BUFFER_AVAILABLE: - break; - } - - // Give lower priority to LSUnit stall events. - switch (LSU->isAvailable(IR)) { - case LSUnit::LSU_LQUEUE_FULL: - return Scheduler::SC_LOAD_QUEUE_FULL; - case LSUnit::LSU_SQUEUE_FULL: - return Scheduler::SC_STORE_QUEUE_FULL; - case LSUnit::LSU_AVAILABLE: - return Scheduler::SC_AVAILABLE; - } - - llvm_unreachable("Don't know how to process this LSU state result!"); -} - -void Scheduler::issueInstructionImpl( - InstRef &IR, - SmallVectorImpl> &UsedResources) { - Instruction *IS = IR.getInstruction(); - const InstrDesc &D = IS->getDesc(); - - // Issue the instruction and collect all the consumed resources - // into a vector. That vector is then used to notify the listener. - Resources->issueInstruction(D, UsedResources); - - // Notify the instruction that it started executing. - // This updates the internal state of each write. - IS->execute(); - - if (IS->isExecuting()) - IssuedSet.emplace_back(IR); - else if (IS->isExecuted()) - LSU->onInstructionExecuted(IR); -} - -// Release the buffered resources and issue the instruction. -void Scheduler::issueInstruction( - InstRef &IR, SmallVectorImpl> &UsedResources, - SmallVectorImpl &ReadyInstructions) { - const Instruction &Inst = *IR.getInstruction(); - bool HasDependentUsers = Inst.hasDependentUsers(); - - Resources->releaseBuffers(Inst.getDesc().Buffers); - issueInstructionImpl(IR, UsedResources); - // Instructions that have been issued during this cycle might have unblocked - // other dependent instructions. Dependent instructions may be issued during - // this same cycle if operands have ReadAdvance entries. Promote those - // instructions to the ReadySet and notify the caller that those are ready. - if (HasDependentUsers) - promoteToReadySet(ReadyInstructions); -} - -void Scheduler::promoteToReadySet(SmallVectorImpl &Ready) { - // Scan the set of waiting instructions and promote them to the - // ready queue if operands are all ready. - unsigned RemovedElements = 0; - for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { - InstRef &IR = *I; - if (!IR.isValid()) - break; - - // Check if this instruction is now ready. In case, force - // a transition in state using method 'update()'. - Instruction &IS = *IR.getInstruction(); - if (!IS.isReady()) - IS.update(); - - // Check if there are still unsolved data dependencies. - if (!isReady(IR)) { - ++I; - continue; - } - - Ready.emplace_back(IR); - ReadySet.emplace_back(IR); - - IR.invalidate(); - ++RemovedElements; - std::iter_swap(I, E - RemovedElements); - } - - WaitSet.resize(WaitSet.size() - RemovedElements); -} - -InstRef Scheduler::select() { - unsigned QueueIndex = ReadySet.size(); - for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { - const InstRef &IR = ReadySet[I]; - if (QueueIndex == ReadySet.size() || - Strategy->compare(IR, ReadySet[QueueIndex])) { - const InstrDesc &D = IR.getInstruction()->getDesc(); - if (Resources->canBeIssued(D)) - QueueIndex = I; - } - } - - if (QueueIndex == ReadySet.size()) - return InstRef(); - - // We found an instruction to issue. - InstRef IR = ReadySet[QueueIndex]; - std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); - ReadySet.pop_back(); - return IR; -} - -void Scheduler::updateIssuedSet(SmallVectorImpl &Executed) { - unsigned RemovedElements = 0; - for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { - InstRef &IR = *I; - if (!IR.isValid()) - break; - Instruction &IS = *IR.getInstruction(); - if (!IS.isExecuted()) { - LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR - << " is still executing.\n"); - ++I; - continue; - } - - // Instruction IR has completed execution. - LSU->onInstructionExecuted(IR); - Executed.emplace_back(IR); - ++RemovedElements; - IR.invalidate(); - std::iter_swap(I, E - RemovedElements); - } - - IssuedSet.resize(IssuedSet.size() - RemovedElements); -} - -void Scheduler::cycleEvent(SmallVectorImpl &Freed, - SmallVectorImpl &Executed, - SmallVectorImpl &Ready) { - // Release consumed resources. - Resources->cycleEvent(Freed); - - // Propagate the cycle event to the 'Issued' and 'Wait' sets. - for (InstRef &IR : IssuedSet) - IR.getInstruction()->cycleEvent(); - - updateIssuedSet(Executed); - - for (InstRef &IR : WaitSet) - IR.getInstruction()->cycleEvent(); - - promoteToReadySet(Ready); -} - -bool Scheduler::mustIssueImmediately(const InstRef &IR) const { - // Instructions that use an in-order dispatch/issue processor resource must be - // issued immediately to the pipeline(s). Any other in-order buffered - // resources (i.e. BufferSize=1) is consumed. - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc); -} - -void Scheduler::dispatch(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - Resources->reserveBuffers(Desc.Buffers); - - // If necessary, reserve queue entries in the load-store unit (LSU). - bool IsMemOp = Desc.MayLoad || Desc.MayStore; - if (IsMemOp) - LSU->dispatch(IR); - - if (!isReady(IR)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); - WaitSet.push_back(IR); - return; - } - - // Don't add a zero-latency instruction to the Ready queue. - // A zero-latency instruction doesn't consume any scheduler resources. That is - // because it doesn't need to be executed, and it is often removed at register - // renaming stage. For example, register-register moves are often optimized at - // register renaming stage by simply updating register aliases. On some - // targets, zero-idiom instructions (for example: a xor that clears the value - // of a register) are treated specially, and are often eliminated at register - // renaming stage. - if (!mustIssueImmediately(IR)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); - ReadySet.push_back(IR); - } -} - -bool Scheduler::isReady(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsMemOp = Desc.MayLoad || Desc.MayStore; - return IR.getInstruction()->isReady() && (!IsMemOp || LSU->isReady(IR)); -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/SourceMgr.h =================================================================== --- llvm/trunk/tools/llvm-mca/SourceMgr.h +++ llvm/trunk/tools/llvm-mca/SourceMgr.h @@ -1,64 +0,0 @@ -//===--------------------- SourceMgr.h --------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements class SourceMgr. Class SourceMgr abstracts the input -/// code sequence (a sequence of MCInst), and assings unique identifiers to -/// every instruction in the sequence. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H -#define LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H - -#include "llvm/MC/MCInst.h" -#include - -namespace mca { - -typedef std::pair SourceRef; - -class SourceMgr { - using InstVec = std::vector>; - const InstVec &Sequence; - unsigned Current; - unsigned Iterations; - static const unsigned DefaultIterations = 100; - -public: - SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations) - : Sequence(MCInstSequence), Current(0), - Iterations(NumIterations ? NumIterations : DefaultIterations) {} - - unsigned getCurrentIteration() const { return Current / Sequence.size(); } - unsigned getNumIterations() const { return Iterations; } - unsigned size() const { return Sequence.size(); } - const InstVec &getSequence() const { return Sequence; } - - bool hasNext() const { return Current < (Iterations * size()); } - void updateNext() { Current++; } - - const SourceRef peekNext() const { - assert(hasNext() && "Already at end of sequence!"); - unsigned Index = getCurrentInstructionIndex(); - return SourceRef(Current, Sequence[Index].get()); - } - - unsigned getCurrentInstructionIndex() const { - return Current % Sequence.size(); - } - - const llvm::MCInst &getMCInstFromIndex(unsigned Index) const { - return *Sequence[Index % size()]; - } - - bool isEmpty() const { return size() == 0; } -}; -} // namespace mca - -#endif Index: llvm/trunk/tools/llvm-mca/Stage.h =================================================================== --- llvm/trunk/tools/llvm-mca/Stage.h +++ llvm/trunk/tools/llvm-mca/Stage.h @@ -1,86 +0,0 @@ -//===---------------------- Stage.h -----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a stage. -/// A chain of stages compose an instruction pipeline. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_STAGE_H - -#include "HWEventListener.h" -#include "llvm/Support/Error.h" -#include - -namespace mca { - -class InstRef; - -class Stage { - Stage *NextInSequence; - std::set Listeners; - - Stage(const Stage &Other) = delete; - Stage &operator=(const Stage &Other) = delete; - -protected: - const std::set &getListeners() const { return Listeners; } - -public: - Stage() : NextInSequence(nullptr) {} - virtual ~Stage(); - - /// Returns true if it can execute IR during this cycle. - virtual bool isAvailable(const InstRef &IR) const { return true; } - - /// Returns true if some instructions are still executing this stage. - virtual bool hasWorkToComplete() const = 0; - - /// Called once at the start of each cycle. This can be used as a setup - /// phase to prepare for the executions during the cycle. - virtual llvm::Error cycleStart() { return llvm::ErrorSuccess(); } - - /// Called once at the end of each cycle. - virtual llvm::Error cycleEnd() { return llvm::ErrorSuccess(); } - - /// The primary action that this stage performs on instruction IR. - virtual llvm::Error execute(InstRef &IR) = 0; - - void setNextInSequence(Stage *NextStage) { - assert(!NextInSequence && "This stage already has a NextInSequence!"); - NextInSequence = NextStage; - } - - bool checkNextStage(const InstRef &IR) const { - return NextInSequence && NextInSequence->isAvailable(IR); - } - - /// Called when an instruction is ready to move the next pipeline stage. - /// - /// Stages are responsible for moving instructions to their immediate - /// successor stages. - llvm::Error moveToTheNextStage(InstRef &IR) { - assert(checkNextStage(IR) && "Next stage is not ready!"); - return NextInSequence->execute(IR); - } - - /// Add a listener to receive callbacks during the execution of this stage. - void addListener(HWEventListener *Listener); - - /// Notify listeners of a particular hardware event. - template void notifyEvent(const EventT &Event) const { - for (HWEventListener *Listener : Listeners) - Listener->onEvent(Event); - } -}; - -} // namespace mca -#endif // LLVM_TOOLS_LLVM_MCA_STAGE_H Index: llvm/trunk/tools/llvm-mca/Stage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Stage.cpp +++ llvm/trunk/tools/llvm-mca/Stage.cpp @@ -1,27 +0,0 @@ -//===---------------------- Stage.cpp ---------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a stage. -/// A chain of stages compose an instruction pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "Stage.h" - -namespace mca { - -// Pin the vtable here in the implementation file. -Stage::~Stage() = default; - -void Stage::addListener(HWEventListener *Listener) { - Listeners.insert(Listener); -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/Support.h =================================================================== --- llvm/trunk/tools/llvm-mca/Support.h +++ llvm/trunk/tools/llvm-mca/Support.h @@ -1,58 +0,0 @@ -//===--------------------- Support.h ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// Helper functions used by various pipeline components. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H -#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" - -namespace mca { - -/// Populates vector Masks with processor resource masks. -/// -/// The number of bits set in a mask depends on the processor resource type. -/// Each processor resource mask has at least one bit set. For groups, the -/// number of bits set in the mask is equal to the cardinality of the group plus -/// one. Excluding the most significant bit, the remaining bits in the mask -/// identify processor resources that are part of the group. -/// -/// Example: -/// -/// ResourceA -- Mask: 0b001 -/// ResourceB -- Mask: 0b010 -/// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111 -/// -/// ResourceAB is a processor resource group containing ResourceA and ResourceB. -/// Each resource mask uniquely identifies a resource; both ResourceA and -/// ResourceB only have one bit set. -/// ResourceAB is a group; excluding the most significant bit in the mask, the -/// remaining bits identify the composition of the group. -/// -/// Resource masks are used by the ResourceManager to solve set membership -/// problems with simple bit manipulation operations. -void computeProcResourceMasks(const llvm::MCSchedModel &SM, - llvm::SmallVectorImpl &Masks); - -/// Compute the reciprocal block throughput from a set of processor resource -/// cycles. The reciprocal block throughput is computed as the MAX between: -/// - NumMicroOps / DispatchWidth -/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource). -double computeBlockRThroughput(const llvm::MCSchedModel &SM, - unsigned DispatchWidth, unsigned NumMicroOps, - llvm::ArrayRef ProcResourceUsage); -} // namespace mca - -#endif Index: llvm/trunk/tools/llvm-mca/Support.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Support.cpp +++ llvm/trunk/tools/llvm-mca/Support.cpp @@ -1,79 +0,0 @@ -//===--------------------- Support.cpp --------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements a few helper functions used by various pipeline -/// components. -/// -//===----------------------------------------------------------------------===// - -#include "Support.h" -#include "llvm/MC/MCSchedule.h" - -namespace mca { - -using namespace llvm; - -void computeProcResourceMasks(const MCSchedModel &SM, - SmallVectorImpl &Masks) { - unsigned ProcResourceID = 0; - - // Create a unique bitmask for every processor resource unit. - // Skip resource at index 0, since it always references 'InvalidUnit'. - Masks.resize(SM.getNumProcResourceKinds()); - for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { - const MCProcResourceDesc &Desc = *SM.getProcResource(I); - if (Desc.SubUnitsIdxBegin) - continue; - Masks[I] = 1ULL << ProcResourceID; - ProcResourceID++; - } - - // Create a unique bitmask for every processor resource group. - for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { - const MCProcResourceDesc &Desc = *SM.getProcResource(I); - if (!Desc.SubUnitsIdxBegin) - continue; - Masks[I] = 1ULL << ProcResourceID; - for (unsigned U = 0; U < Desc.NumUnits; ++U) { - uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]]; - Masks[I] |= OtherMask; - } - ProcResourceID++; - } -} - -double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, - unsigned NumMicroOps, - ArrayRef ProcResourceUsage) { - // The block throughput is bounded from above by the hardware dispatch - // throughput. That is because the DispatchWidth is an upper bound on the - // number of opcodes that can be part of a single dispatch group. - double Max = static_cast(NumMicroOps) / DispatchWidth; - - // The block throughput is also limited by the amount of hardware parallelism. - // The number of available resource units affects the resource pressure - // distribution, as well as how many blocks can be executed every cycle. - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - unsigned ResourceCycles = ProcResourceUsage[I]; - if (!ResourceCycles) - continue; - - const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); - double Throughput = static_cast(ResourceCycles) / MCDesc.NumUnits; - Max = std::max(Max, Throughput); - } - - // The block reciprocal throughput is computed as the MAX of: - // - (NumMicroOps / DispatchWidth) - // - (NumUnits / ResourceCycles) for every consumed processor resource. - return Max; -} - -} // namespace mca Index: llvm/trunk/tools/llvm-mca/include/Context.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Context.h +++ llvm/trunk/tools/llvm-mca/include/Context.h @@ -0,0 +1,68 @@ +//===---------------------------- Context.h ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a class for holding ownership of various simulated +/// hardware units. A Context also provides a utility routine for constructing +/// a default out-of-order pipeline with fetch, dispatch, execute, and retire +/// stages. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_CONTEXT_H +#define LLVM_TOOLS_LLVM_MCA_CONTEXT_H +#include "HardwareUnits/HardwareUnit.h" +#include "InstrBuilder.h" +#include "Pipeline.h" +#include "SourceMgr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include + +namespace mca { + +/// This is a convenience struct to hold the parameters necessary for creating +/// the pre-built "default" out-of-order pipeline. +struct PipelineOptions { + PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS, + bool NoAlias) + : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS), + StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {} + unsigned DispatchWidth; + unsigned RegisterFileSize; + unsigned LoadQueueSize; + unsigned StoreQueueSize; + bool AssumeNoAlias; +}; + +class Context { + llvm::SmallVector, 4> Hardware; + const llvm::MCRegisterInfo &MRI; + const llvm::MCSubtargetInfo &STI; + +public: + Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S) + : MRI(R), STI(S) {} + Context(const Context &C) = delete; + Context &operator=(const Context &C) = delete; + + void addHardwareUnit(std::unique_ptr H) { + Hardware.push_back(std::move(H)); + } + + /// Construct a basic pipeline for simulating an out-of-order pipeline. + /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages. + std::unique_ptr createDefaultPipeline(const PipelineOptions &Opts, + InstrBuilder &IB, + SourceMgr &SrcMgr); +}; + +} // namespace mca +#endif // LLVM_TOOLS_LLVM_MCA_CONTEXT_H Index: llvm/trunk/tools/llvm-mca/include/HWEventListener.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HWEventListener.h +++ llvm/trunk/tools/llvm-mca/include/HWEventListener.h @@ -0,0 +1,141 @@ +//===----------------------- HWEventListener.h ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the main interface for hardware event listeners. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H +#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H + +#include "Instruction.h" +#include "llvm/ADT/ArrayRef.h" +#include + +namespace mca { + +// An HWInstructionEvent represents state changes of instructions that +// listeners might be interested in. Listeners can choose to ignore any event +// they are not interested in. +class HWInstructionEvent { +public: + // This is the list of event types that are shared by all targets, that + // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent, + // ...) and generic Views can manipulate. + // Subtargets are free to define additional event types, that are goin to be + // handled by generic components as opaque values, but can still be + // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage, + // DispatchStage, ...) and interpreted by subtarget-specific EventListener + // implementations. + enum GenericEventType { + Invalid = 0, + // Events generated by the Retire Control Unit. + Retired, + // Events generated by the Scheduler. + Ready, + Issued, + Executed, + // Events generated by the Dispatch logic. + Dispatched, + + LastGenericEventType, + }; + + HWInstructionEvent(unsigned type, const InstRef &Inst) + : Type(type), IR(Inst) {} + + // The event type. The exact meaning depends on the subtarget. + const unsigned Type; + + // The instruction this event was generated for. + const InstRef &IR; +}; + +class HWInstructionIssuedEvent : public HWInstructionEvent { +public: + using ResourceRef = std::pair; + HWInstructionIssuedEvent(const InstRef &IR, + llvm::ArrayRef> UR) + : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {} + + llvm::ArrayRef> UsedResources; +}; + +class HWInstructionDispatchedEvent : public HWInstructionEvent { +public: + HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef Regs) + : HWInstructionEvent(HWInstructionEvent::Dispatched, IR), + UsedPhysRegs(Regs) {} + // Number of physical register allocated for this instruction. There is one + // entry per register file. + llvm::ArrayRef UsedPhysRegs; +}; + +class HWInstructionRetiredEvent : public HWInstructionEvent { +public: + HWInstructionRetiredEvent(const InstRef &IR, llvm::ArrayRef Regs) + : HWInstructionEvent(HWInstructionEvent::Retired, IR), + FreedPhysRegs(Regs) {} + // Number of register writes that have been architecturally committed. There + // is one entry per register file. + llvm::ArrayRef FreedPhysRegs; +}; + +// A HWStallEvent represents a pipeline stall caused by the lack of hardware +// resources. +class HWStallEvent { +public: + enum GenericEventType { + Invalid = 0, + // Generic stall events generated by the DispatchStage. + RegisterFileStall, + RetireControlUnitStall, + // Generic stall events generated by the Scheduler. + DispatchGroupStall, + SchedulerQueueFull, + LoadQueueFull, + StoreQueueFull, + LastGenericEvent + }; + + HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {} + + // The exact meaning of the stall event type depends on the subtarget. + const unsigned Type; + + // The instruction this event was generated for. + const InstRef &IR; +}; + +class HWEventListener { +public: + // Generic events generated by the pipeline. + virtual void onCycleBegin() {} + virtual void onCycleEnd() {} + + virtual void onEvent(const HWInstructionEvent &Event) {} + virtual void onEvent(const HWStallEvent &Event) {} + + using ResourceRef = std::pair; + virtual void onResourceAvailable(const ResourceRef &RRef) {} + + // Events generated by the Scheduler when buffered resources are + // consumed/freed. + virtual void onReservedBuffers(llvm::ArrayRef Buffers) {} + virtual void onReleasedBuffers(llvm::ArrayRef Buffers) {} + + virtual ~HWEventListener() {} + +private: + virtual void anchor(); +}; +} // namespace mca + +#endif Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h @@ -0,0 +1,31 @@ +//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a base class for describing a simulated hardware +/// unit. These units are used to construct a simulated backend. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H +#define LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H + +namespace mca { + +class HardwareUnit { + HardwareUnit(const HardwareUnit &H) = delete; + HardwareUnit &operator=(const HardwareUnit &H) = delete; + +public: + HardwareUnit() = default; + virtual ~HardwareUnit(); +}; + +} // namespace mca +#endif // LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h @@ -0,0 +1,161 @@ +//===------------------------- LSUnit.h --------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A Load/Store unit class that models load/store queues and that implements +/// a simple weak memory consistency model. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H +#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H + +#include "HardwareUnits/HardwareUnit.h" +#include + +namespace mca { + +class InstRef; +struct InstrDesc; + +/// A Load/Store Unit implementing a load and store queues. +/// +/// This class implements a load queue and a store queue to emulate the +/// out-of-order execution of memory operations. +/// Each load (or store) consumes an entry in the load (or store) queue. +/// +/// Rules are: +/// 1) A younger load is allowed to pass an older load only if there are no +/// stores nor barriers in between the two loads. +/// 2) An younger store is not allowed to pass an older store. +/// 3) A younger store is not allowed to pass an older load. +/// 4) A younger load is allowed to pass an older store only if the load does +/// not alias with the store. +/// +/// This class optimistically assumes that loads don't alias store operations. +/// Under this assumption, younger loads are always allowed to pass older +/// stores (this would only affects rule 4). +/// Essentially, this LSUnit doesn't attempt to run any sort alias analysis to +/// predict when loads and stores don't alias with eachother. +/// +/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be +/// set to `false` by the constructor of LSUnit. +/// +/// In the case of write-combining memory, rule 2. could be relaxed to allow +/// reordering of non-aliasing store operations. At the moment, this is not +/// allowed. +/// To put it in another way, there is no option to specify a different memory +/// type for memory operations (example: write-through, write-combining, etc.). +/// Also, there is no way to weaken the memory model, and this unit currently +/// doesn't support write-combining behavior. +/// +/// No assumptions are made on the size of the store buffer. +/// As mentioned before, this class doesn't perform alias analysis. +/// Consequently, LSUnit doesn't know how to identify cases where +/// store-to-load forwarding may occur. +/// +/// LSUnit doesn't attempt to predict whether a load or store hits or misses +/// the L1 cache. To be more specific, LSUnit doesn't know anything about +/// the cache hierarchy and memory types. +/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the +/// scheduling model provides an "optimistic" load-to-use latency (which usually +/// matches the load-to-use latency for when there is a hit in the L1D). +/// +/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor +/// memory-barrier like instructions. +/// LSUnit conservatively assumes that an instruction which `mayLoad` and has +/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it +/// serializes loads without forcing a flush of the load queue. +/// Similarly, instructions that both `mayStore` and have `unmodeled side +/// effects` are treated like store barriers. A full memory +/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side +/// effects. This is obviously inaccurate, but this is the best that we can do +/// at the moment. +/// +/// Each load/store barrier consumes one entry in the load/store queue. A +/// load/store barrier enforces ordering of loads/stores: +/// - A younger load cannot pass a load barrier. +/// - A younger store cannot pass a store barrier. +/// +/// A younger load has to wait for the memory load barrier to execute. +/// A load/store barrier is "executed" when it becomes the oldest entry in +/// the load/store queue(s). That also means, all the older loads/stores have +/// already been executed. +class LSUnit : public HardwareUnit { + // Load queue size. + // LQ_Size == 0 means that there are infinite slots in the load queue. + unsigned LQ_Size; + + // Store queue size. + // SQ_Size == 0 means that there are infinite slots in the store queue. + unsigned SQ_Size; + + // If true, loads will never alias with stores. This is the default. + bool NoAlias; + + std::set LoadQueue; + std::set StoreQueue; + + void assignLQSlot(unsigned Index); + void assignSQSlot(unsigned Index); + bool isReadyNoAlias(unsigned Index) const; + + // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is + // conservatively treated as a store barrier. It forces older store to be + // executed before newer stores are issued. + std::set StoreBarriers; + + // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is + // conservatively treated as a load barrier. It forces older loads to execute + // before newer loads are issued. + std::set LoadBarriers; + + bool isSQEmpty() const { return StoreQueue.empty(); } + bool isLQEmpty() const { return LoadQueue.empty(); } + bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; } + bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; } + +public: + LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false) + : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {} + +#ifndef NDEBUG + void dump() const; +#endif + + enum Status { + LSU_AVAILABLE = 0, + LSU_LQUEUE_FULL, + LSU_SQUEUE_FULL + }; + + // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve + // IR. It also returns LSU_AVAILABLE if IR is not a memory operation. + Status isAvailable(const InstRef &IR) const; + + // Allocates load/store queue resources for IR. + // + // This method assumes that a previous call to `isAvailable(IR)` returned + // LSU_AVAILABLE, and that IR is a memory operation. + void dispatch(const InstRef &IR); + + // By default, rules are: + // 1. A store may not pass a previous store. + // 2. A load may not pass a previous store unless flag 'NoAlias' is set. + // 3. A load may pass a previous load. + // 4. A store may not pass a previous load (regardless of flag 'NoAlias'). + // 5. A load has to wait until an older load barrier is fully executed. + // 6. A store has to wait until an older store barrier is fully executed. + virtual bool isReady(const InstRef &IR) const; + void onInstructionExecuted(const InstRef &IR); +}; + +} // namespace mca + +#endif Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h @@ -0,0 +1,171 @@ +//===--------------------- RegisterFile.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a register mapping file class. This class is responsible +/// for managing hardware register files and the tracking of data dependencies +/// between registers. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H +#define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H + +#include "HardwareUnits/HardwareUnit.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Error.h" + +namespace mca { + +class ReadState; +class WriteState; +class WriteRef; + +/// Manages hardware register files, and tracks register definitions for +/// register renaming purposes. +class RegisterFile : public HardwareUnit { + const llvm::MCRegisterInfo &MRI; + + // Each register file is associated with an instance of + // RegisterMappingTracker. + // A RegisterMappingTracker keeps track of the number of physical registers + // which have been dynamically allocated by the simulator. + struct RegisterMappingTracker { + // The total number of physical registers that are available in this + // register file for register renaming purpouses. A value of zero for this + // field means: this register file has an unbounded number of physical + // registers. + const unsigned NumPhysRegs; + // Number of physical registers that are currently in use. + unsigned NumUsedPhysRegs; + + RegisterMappingTracker(unsigned NumPhysRegisters) + : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0) {} + }; + + // A vector of register file descriptors. This set always contains at least + // one entry. Entry at index #0 is reserved. That entry describes a register + // file with an unbounded number of physical registers that "sees" all the + // hardware registers declared by the target (i.e. all the register + // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is + // the target name). + // + // Users can limit the number of physical registers that are available in + // regsiter file #0 specifying command line flag `-register-file-size=`. + llvm::SmallVector RegisterFiles; + + // This type is used to propagate information about the owner of a register, + // and the cost of allocating it in the PRF. Register cost is defined as the + // number of physical registers consumed by the PRF to allocate a user + // register. + // + // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical + // registers. So, the cost of allocating a YMM register in BtVer2 is 2. + using IndexPlusCostPairTy = std::pair; + + // Struct RegisterRenamingInfo maps registers to register files. + // There is a RegisterRenamingInfo object for every register defined by + // the target. RegisteRenamingInfo objects are stored into vector + // RegisterMappings, and register IDs can be used to reference them. + struct RegisterRenamingInfo { + IndexPlusCostPairTy IndexPlusCost; + llvm::MCPhysReg RenameAs; + }; + + // RegisterMapping objects are mainly used to track physical register + // definitions. There is a RegisterMapping for every register defined by the + // Target. For each register, a RegisterMapping pair contains a descriptor of + // the last register write (in the form of a WriteRef object), as well as a + // RegisterRenamingInfo to quickly identify owning register files. + // + // This implementation does not allow overlapping register files. The only + // register file that is allowed to overlap with other register files is + // register file #0. If we exclude register #0, every register is "owned" by + // at most one register file. + using RegisterMapping = std::pair; + + // This map contains one entry for each register defined by the target. + std::vector RegisterMappings; + + // This method creates a new register file descriptor. + // The new register file owns all of the registers declared by register + // classes in the 'RegisterClasses' set. + // + // Processor models allow the definition of RegisterFile(s) via tablegen. For + // example, this is a tablegen definition for a x86 register file for + // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1 + // physical register). + // + // def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]> + // + // Here FPRegisterFile contains all the registers defined by register class + // VR128RegClass and VR256RegClass. FPRegisterFile implements 60 + // registers which can be used for register renaming purpose. + void + addRegisterFile(llvm::ArrayRef RegisterClasses, + unsigned NumPhysRegs); + + // Consumes physical registers in each register file specified by the + // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`. + void allocatePhysRegs(const RegisterRenamingInfo &Entry, + llvm::MutableArrayRef UsedPhysRegs); + + // Releases previously allocated physical registers from the register file(s). + // This method is called from `invalidateRegisterMapping()`. + void freePhysRegs(const RegisterRenamingInfo &Entry, + llvm::MutableArrayRef FreedPhysRegs); + + // Create an instance of RegisterMappingTracker for every register file + // specified by the processor model. + // If no register file is specified, then this method creates a default + // register file with an unbounded number of physical registers. + void initialize(const llvm::MCSchedModel &SM, unsigned NumRegs); + +public: + RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri, + unsigned NumRegs = 0); + + // This method updates the register mappings inserting a new register + // definition. This method is also responsible for updating the number of + // allocated physical registers in each register file modified by the write. + // No physical regiser is allocated when flag ShouldAllocatePhysRegs is set. + void addRegisterWrite(WriteRef Write, + llvm::MutableArrayRef UsedPhysRegs, + bool ShouldAllocatePhysRegs = true); + + // Removes write \param WS from the register mappings. + // Physical registers may be released to reflect this update. + void removeRegisterWrite(const WriteState &WS, + llvm::MutableArrayRef FreedPhysRegs, + bool ShouldFreePhysRegs = true); + + // Checks if there are enough physical registers in the register files. + // Returns a "response mask" where each bit represents the response from a + // different register file. A mask of all zeroes means that all register + // files are available. Otherwise, the mask can be used to identify which + // register file was busy. This sematic allows us to classify dispatch + // stalls caused by the lack of register file resources. + // + // Current implementation can simulate up to 32 register files (including the + // special register file at index #0). + unsigned isAvailable(llvm::ArrayRef Regs) const; + void collectWrites(llvm::SmallVectorImpl &Writes, + unsigned RegID) const; + unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h @@ -0,0 +1,360 @@ +//===--------------------- ResourceManager.h --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The classes here represent processor resource units and their management +/// strategy. These classes are managed by the Scheduler. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H +#define LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H + +#include "Instruction.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" + +namespace mca { + +/// Used to notify the internal state of a processor resource. +/// +/// A processor resource is available if it is not reserved, and there are +/// available slots in the buffer. A processor resource is unavailable if it +/// is either reserved, or the associated buffer is full. A processor resource +/// with a buffer size of -1 is always available if it is not reserved. +/// +/// Values of type ResourceStateEvent are returned by method +/// ResourceState::isBufferAvailable(), which is used to query the internal +/// state of a resource. +/// +/// The naming convention for resource state events is: +/// * Event names start with prefix RS_ +/// * Prefix RS_ is followed by a string describing the actual resource state. +enum ResourceStateEvent { + RS_BUFFER_AVAILABLE, + RS_BUFFER_UNAVAILABLE, + RS_RESERVED +}; + +/// Resource allocation strategy used by hardware scheduler resources. +class ResourceStrategy { + ResourceStrategy(const ResourceStrategy &) = delete; + ResourceStrategy &operator=(const ResourceStrategy &) = delete; + +public: + ResourceStrategy() {} + virtual ~ResourceStrategy(); + + /// Selects a processor resource unit from a ReadyMask. + virtual uint64_t select(uint64_t ReadyMask) = 0; + + /// Called by the ResourceManager when a processor resource group, or a + /// processor resource with multiple units has become unavailable. + /// + /// The default strategy uses this information to bias its selection logic. + virtual void used(uint64_t ResourceMask) {} +}; + +/// Default resource allocation strategy used by processor resource groups and +/// processor resources with multiple units. +class DefaultResourceStrategy final : public ResourceStrategy { + /// A Mask of resource unit identifiers. + /// + /// There is one bit set for every available resource unit. + /// It defaults to the value of field ResourceSizeMask in ResourceState. + const unsigned ResourceUnitMask; + + /// A simple round-robin selector for processor resource units. + /// Each bit of this mask identifies a sub resource within a group. + /// + /// As an example, lets assume that this is a default policy for a + /// processor resource group composed by the following three units: + /// ResourceA -- 0b001 + /// ResourceB -- 0b010 + /// ResourceC -- 0b100 + /// + /// Field NextInSequenceMask is used to select the next unit from the set of + /// resource units. It defaults to the value of field `ResourceUnitMasks` (in + /// this example, it defaults to mask '0b111'). + /// + /// The round-robin selector would firstly select 'ResourceC', then + /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the + /// corresponding bit in NextInSequenceMask is cleared. For example, if + /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes + /// 0xb011. + /// + /// When NextInSequenceMask becomes zero, it is automatically reset to the + /// default value (i.e. ResourceUnitMask). + uint64_t NextInSequenceMask; + + /// This field is used to track resource units that are used (i.e. selected) + /// by other groups other than the one associated with this strategy object. + /// + /// In LLVM processor resource groups are allowed to partially (or fully) + /// overlap. That means, a same unit may be visible to multiple groups. + /// This field keeps track of uses that have originated from outside of + /// this group. The idea is to bias the selection strategy, so that resources + /// that haven't been used by other groups get prioritized. + /// + /// The end goal is to (try to) keep the resource distribution as much uniform + /// as possible. By construction, this mask only tracks one-level of resource + /// usage. Therefore, this strategy is expected to be less accurate when same + /// units are used multiple times by other groups within a single round of + /// select. + /// + /// Note: an LRU selector would have a better accuracy at the cost of being + /// slightly more expensive (mostly in terms of runtime cost). Methods + /// 'select' and 'used', are always in the hot execution path of llvm-mca. + /// Therefore, a slow implementation of 'select' would have a negative impact + /// on the overall performance of the tool. + uint64_t RemovedFromNextInSequence; + + void skipMask(uint64_t Mask); + +public: + DefaultResourceStrategy(uint64_t UnitMask) + : ResourceStrategy(), ResourceUnitMask(UnitMask), + NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {} + virtual ~DefaultResourceStrategy() = default; + + uint64_t select(uint64_t ReadyMask) override; + void used(uint64_t Mask) override; +}; + +/// A processor resource descriptor. +/// +/// There is an instance of this class for every processor resource defined by +/// the machine scheduling model. +/// Objects of class ResourceState dynamically track the usage of processor +/// resource units. +class ResourceState { + /// An index to the MCProcResourceDesc entry in the processor model. + const unsigned ProcResourceDescIndex; + /// A resource mask. This is generated by the tool with the help of + /// function `mca::createProcResourceMasks' (see Support.h). + const uint64_t ResourceMask; + + /// A ProcResource can have multiple units. + /// + /// For processor resource groups, + /// this field default to the value of field `ResourceMask`; the number of + /// bits set is equal to the cardinality of the group. For normal (i.e. + /// non-group) resources, the number of bits set in this mask is equivalent + /// to the number of units declared by the processor model (see field + /// 'NumUnits' in 'ProcResourceUnits'). + uint64_t ResourceSizeMask; + + /// A mask of ready units. + uint64_t ReadyMask; + + /// Buffered resources will have this field set to a positive number different + /// than zero. A buffered resource behaves like a reservation station + /// implementing its own buffer for out-of-order execution. + /// + /// A BufferSize of 1 is used by scheduler resources that force in-order + /// execution. + /// + /// A BufferSize of 0 is used to model in-order issue/dispatch resources. + /// Since in-order issue/dispatch resources don't implement buffers, dispatch + /// events coincide with issue events. + /// Also, no other instruction ca be dispatched/issue while this resource is + /// in use. Only when all the "resource cycles" are consumed (after the issue + /// event), a new instruction ca be dispatched. + const int BufferSize; + + /// Available slots in the buffer (zero, if this is not a buffered resource). + unsigned AvailableSlots; + + /// This field is set if this resource is currently reserved. + /// + /// Resources can be reserved for a number of cycles. + /// Instructions can still be dispatched to reserved resources. However, + /// istructions dispatched to a reserved resource cannot be issued to the + /// underlying units (i.e. pipelines) until the resource is released. + bool Unavailable; + + /// Checks for the availability of unit 'SubResMask' in the group. + bool isSubResourceReady(uint64_t SubResMask) const { + return ReadyMask & SubResMask; + } + +public: + ResourceState(const llvm::MCProcResourceDesc &Desc, unsigned Index, + uint64_t Mask); + + unsigned getProcResourceID() const { return ProcResourceDescIndex; } + uint64_t getResourceMask() const { return ResourceMask; } + uint64_t getReadyMask() const { return ReadyMask; } + int getBufferSize() const { return BufferSize; } + + bool isBuffered() const { return BufferSize > 0; } + bool isInOrder() const { return BufferSize == 1; } + + /// Returns true if this is an in-order dispatch/issue resource. + bool isADispatchHazard() const { return BufferSize == 0; } + bool isReserved() const { return Unavailable; } + + void setReserved() { Unavailable = true; } + void clearReserved() { Unavailable = false; } + + /// Returs true if this resource is not reserved, and if there are at least + /// `NumUnits` available units. + bool isReady(unsigned NumUnits = 1) const; + + bool isAResourceGroup() const { + return llvm::countPopulation(ResourceMask) > 1; + } + + bool containsResource(uint64_t ID) const { return ResourceMask & ID; } + + void markSubResourceAsUsed(uint64_t ID) { + assert(isSubResourceReady(ID)); + ReadyMask ^= ID; + } + + void releaseSubResource(uint64_t ID) { + assert(!isSubResourceReady(ID)); + ReadyMask ^= ID; + } + + unsigned getNumUnits() const { + return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask); + } + + /// Checks if there is an available slot in the resource buffer. + /// + /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if + /// there is a slot available. + /// + /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it + /// is reserved. + /// + /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. + ResourceStateEvent isBufferAvailable() const; + + /// Reserve a slot in the buffer. + void reserveBuffer() { + if (AvailableSlots) + AvailableSlots--; + } + + /// Release a slot in the buffer. + void releaseBuffer() { + if (BufferSize > 0) + AvailableSlots++; + assert(AvailableSlots <= static_cast(BufferSize)); + } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +/// A resource unit identifier. +/// +/// This is used to identify a specific processor resource unit using a pair +/// of indices where the 'first' index is a processor resource mask, and the +/// 'second' index is an index for a "sub-resource" (i.e. unit). +typedef std::pair ResourceRef; + +// First: a MCProcResourceDesc index identifying a buffered resource. +// Second: max number of buffer entries used in this resource. +typedef std::pair BufferUsageEntry; + +/// A resource manager for processor resource units and groups. +/// +/// This class owns all the ResourceState objects, and it is responsible for +/// acting on requests from a Scheduler by updating the internal state of +/// ResourceState objects. +/// This class doesn't know about instruction itineraries and functional units. +/// In future, it can be extended to support itineraries too through the same +/// public interface. +class ResourceManager { + // The resource manager owns all the ResourceState. + std::vector> Resources; + std::vector> Strategies; + + // Keeps track of which resources are busy, and how many cycles are left + // before those become usable again. + llvm::SmallDenseMap BusyResources; + + // A table to map processor resource IDs to processor resource masks. + llvm::SmallVector ProcResID2Mask; + + // Returns the actual resource unit that will be used. + ResourceRef selectPipe(uint64_t ResourceID); + + void use(const ResourceRef &RR); + void release(const ResourceRef &RR); + + unsigned getNumUnits(uint64_t ResourceID) const; + + // Overrides the selection strategy for the processor resource with the given + // mask. + void setCustomStrategyImpl(std::unique_ptr S, + uint64_t ResourceMask); + +public: + ResourceManager(const llvm::MCSchedModel &SM); + virtual ~ResourceManager() = default; + + // Overrides the selection strategy for the resource at index ResourceID in + // the MCProcResourceDesc table. + void setCustomStrategy(std::unique_ptr S, + unsigned ResourceID) { + assert(ResourceID < ProcResID2Mask.size() && + "Invalid resource index in input!"); + return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]); + } + + // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if + // there are enough available slots in the buffers. + ResourceStateEvent canBeDispatched(llvm::ArrayRef Buffers) const; + + // Return the processor resource identifier associated to this Mask. + unsigned resolveResourceMask(uint64_t Mask) const; + + // Consume a slot in every buffered resource from array 'Buffers'. Resource + // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. + void reserveBuffers(llvm::ArrayRef Buffers); + + // Release buffer entries previously allocated by method reserveBuffers. + void releaseBuffers(llvm::ArrayRef Buffers); + + // Reserve a processor resource. A reserved resource is not available for + // instruction issue until it is released. + void reserveResource(uint64_t ResourceID); + + // Release a previously reserved processor resource. + void releaseResource(uint64_t ResourceID); + + // Returns true if all resources are in-order, and there is at least one + // resource which is a dispatch hazard (BufferSize = 0). + bool mustIssueImmediately(const InstrDesc &Desc) const; + + bool canBeIssued(const InstrDesc &Desc) const; + + void issueInstruction( + const InstrDesc &Desc, + llvm::SmallVectorImpl> &Pipes); + + void cycleEvent(llvm::SmallVectorImpl &ResourcesFreed); + +#ifndef NDEBUG + void dump() const { + for (const std::unique_ptr &Resource : Resources) + Resource->dump(); + } +#endif +}; +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h @@ -0,0 +1,97 @@ +//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file simulates the hardware responsible for retiring instructions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H +#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H + +#include "HardwareUnits/HardwareUnit.h" +#include "Instruction.h" +#include "llvm/MC/MCSchedule.h" +#include + +namespace mca { + +/// This class tracks which instructions are in-flight (i.e., dispatched but not +/// retired) in the OoO backend. +// +/// This class checks on every cycle if/which instructions can be retired. +/// Instructions are retired in program order. +/// In the event of an instruction being retired, the pipeline that owns +/// this RetireControlUnit (RCU) gets notified. +/// +/// On instruction retired, register updates are all architecturally +/// committed, and any physicall registers previously allocated for the +/// retired instruction are freed. +struct RetireControlUnit : public HardwareUnit { + // A RUToken is created by the RCU for every instruction dispatched to the + // schedulers. These "tokens" are managed by the RCU in its token Queue. + // + // On every cycle ('cycleEvent'), the RCU iterates through the token queue + // looking for any token with its 'Executed' flag set. If a token has that + // flag set, then the instruction has reached the write-back stage and will + // be retired by the RCU. + // + // 'NumSlots' represents the number of entries consumed by the instruction in + // the reorder buffer. Those entries will become available again once the + // instruction is retired. + // + // Note that the size of the reorder buffer is defined by the scheduling + // model via field 'NumMicroOpBufferSize'. + struct RUToken { + InstRef IR; + unsigned NumSlots; // Slots reserved to this instruction. + bool Executed; // True if the instruction is past the WB stage. + }; + +private: + unsigned NextAvailableSlotIdx; + unsigned CurrentInstructionSlotIdx; + unsigned AvailableSlots; + unsigned MaxRetirePerCycle; // 0 means no limit. + std::vector Queue; + +public: + RetireControlUnit(const llvm::MCSchedModel &SM); + + bool isEmpty() const { return AvailableSlots == Queue.size(); } + bool isAvailable(unsigned Quantity = 1) const { + // Some instructions may declare a number of uOps which exceeds the size + // of the reorder buffer. To avoid problems, cap the amount of slots to + // the size of the reorder buffer. + Quantity = std::min(Quantity, static_cast(Queue.size())); + return AvailableSlots >= Quantity; + } + + unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; } + + // Reserves a number of slots, and returns a new token. + unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps); + + // Return the current token from the RCU's circular token queue. + const RUToken &peekCurrentToken() const; + + // Advance the pointer to the next token in the circular token queue. + void consumeCurrentToken(); + + // Update the RCU token to represent the executed state. + void onInstructionExecuted(unsigned TokenID); + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h @@ -0,0 +1,212 @@ +//===--------------------- Scheduler.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A scheduler for Processor Resource Units and Processor Resource Groups. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H +#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H + +#include "HardwareUnits/HardwareUnit.h" +#include "HardwareUnits/LSUnit.h" +#include "ResourceManager.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" + +namespace mca { + +class SchedulerStrategy { +public: + SchedulerStrategy() = default; + virtual ~SchedulerStrategy(); + + /// Returns true if Lhs should take priority over Rhs. + /// + /// This method is used by class Scheduler to select the "best" ready + /// instruction to issue to the underlying pipelines. + virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0; +}; + +/// Default instruction selection strategy used by class Scheduler. +class DefaultSchedulerStrategy : public SchedulerStrategy { + /// This method ranks instructions based on their age, and the number of known + /// users. The lower the rank value, the better. + int computeRank(const InstRef &Lhs) const { + return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers(); + } + +public: + DefaultSchedulerStrategy() = default; + virtual ~DefaultSchedulerStrategy(); + + bool compare(const InstRef &Lhs, const InstRef &Rhs) const override { + int LhsRank = computeRank(Lhs); + int RhsRank = computeRank(Rhs); + + /// Prioritize older instructions over younger instructions to minimize the + /// pressure on the reorder buffer. + if (LhsRank == RhsRank) + return Lhs.getSourceIndex() < Rhs.getSourceIndex(); + return LhsRank < RhsRank; + } +}; + +/// Class Scheduler is responsible for issuing instructions to pipeline +/// resources. +/// +/// Internally, it delegates to a ResourceManager the management of processor +/// resources. This class is also responsible for tracking the progress of +/// instructions from the dispatch stage, until the write-back stage. +/// +/// An instruction dispatched to the Scheduler is initially placed into either +/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input +/// operands. +/// +/// An instruction is moved from the WaitSet to the ReadySet when register +/// operands become available, and all memory dependencies are met. +/// Instructions that are moved from the WaitSet to the ReadySet transition +/// in state from 'IS_AVAILABLE' to 'IS_READY'. +/// +/// On every cycle, the Scheduler checks if it can promote instructions from the +/// WaitSet to the ReadySet. +/// +/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued +/// to a (one or more) pipeline(s). This event also causes an instruction state +/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction +/// leaves the IssuedSet when it reaches the write-back stage. +class Scheduler : public HardwareUnit { + LSUnit *LSU; + + // Instruction selection strategy for this Scheduler. + std::unique_ptr Strategy; + + // Hardware resources that are managed by this scheduler. + std::unique_ptr Resources; + + std::vector WaitSet; + std::vector ReadySet; + std::vector IssuedSet; + + /// Verify the given selection strategy and set the Strategy member + /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is + /// used. + void initializeStrategy(std::unique_ptr S); + + /// Issue an instruction without updating the ready queue. + void issueInstructionImpl( + InstRef &IR, + llvm::SmallVectorImpl> &Pipes); + + // Identify instructions that have finished executing, and remove them from + // the IssuedSet. References to executed instructions are added to input + // vector 'Executed'. + void updateIssuedSet(llvm::SmallVectorImpl &Executed); + + // Try to promote instructions from WaitSet to ReadySet. + // Add promoted instructions to the 'Ready' vector in input. + void promoteToReadySet(llvm::SmallVectorImpl &Ready); + +public: + Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu) + : LSU(Lsu), Resources(llvm::make_unique(Model)) { + initializeStrategy(nullptr); + } + Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu, + std::unique_ptr SelectStrategy) + : LSU(Lsu), Resources(llvm::make_unique(Model)) { + initializeStrategy(std::move(SelectStrategy)); + } + Scheduler(std::unique_ptr RM, LSUnit *Lsu, + std::unique_ptr SelectStrategy) + : LSU(Lsu), Resources(std::move(RM)) { + initializeStrategy(std::move(SelectStrategy)); + } + + // Stalls generated by the scheduler. + enum Status { + SC_AVAILABLE, + SC_LOAD_QUEUE_FULL, + SC_STORE_QUEUE_FULL, + SC_BUFFERS_FULL, + SC_DISPATCH_GROUP_STALL, + }; + + /// Check if the instruction in 'IR' can be dispatched and returns an answer + /// in the form of a Status value. + /// + /// The DispatchStage is responsible for querying the Scheduler before + /// dispatching new instructions. This routine is used for performing such + /// a query. If the instruction 'IR' can be dispatched, then true is + /// returned, otherwise false is returned with Event set to the stall type. + /// Internally, it also checks if the load/store unit is available. + Status isAvailable(const InstRef &IR) const; + + /// Reserves buffer and LSUnit queue resources that are necessary to issue + /// this instruction. + /// + /// Returns true if instruction IR is ready to be issued to the underlying + /// pipelines. Note that this operation cannot fail; it assumes that a + /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`. + void dispatch(const InstRef &IR); + + /// Returns true if IR is ready to be executed by the underlying pipelines. + /// This method assumes that IR has been previously dispatched. + bool isReady(const InstRef &IR) const; + + /// Issue an instruction and populates a vector of used pipeline resources, + /// and a vector of instructions that transitioned to the ready state as a + /// result of this event. + void + issueInstruction(InstRef &IR, + llvm::SmallVectorImpl> &Used, + llvm::SmallVectorImpl &Ready); + + /// Returns true if IR has to be issued immediately, or if IR is a zero + /// latency instruction. + bool mustIssueImmediately(const InstRef &IR) const; + + /// This routine notifies the Scheduler that a new cycle just started. + /// + /// It notifies the underlying ResourceManager that a new cycle just started. + /// Vector `Freed` is populated with resourceRef related to resources that + /// have changed in state, and that are now available to new instructions. + /// Instructions executed are added to vector Executed, while vector Ready is + /// populated with instructions that have become ready in this new cycle. + void cycleEvent(llvm::SmallVectorImpl &Freed, + llvm::SmallVectorImpl &Ready, + llvm::SmallVectorImpl &Executed); + + /// Convert a resource mask into a valid llvm processor resource identifier. + unsigned getResourceID(uint64_t Mask) const { + return Resources->resolveResourceMask(Mask); + } + + /// Select the next instruction to issue from the ReadySet. Returns an invalid + /// instruction reference if there are no ready instructions, or if processor + /// resources are not available. + InstRef select(); + +#ifndef NDEBUG + // Update the ready queues. + void dump() const; + + // This routine performs a sanity check. This routine should only be called + // when we know that 'IR' is not in the scheduler's instruction queues. + void sanityCheck(const InstRef &IR) const { + assert(llvm::find(WaitSet, IR) == WaitSet.end()); + assert(llvm::find(ReadySet, IR) == ReadySet.end()); + assert(llvm::find(IssuedSet, IR) == IssuedSet.end()); + } +#endif // !NDEBUG +}; +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H Index: llvm/trunk/tools/llvm-mca/include/InstrBuilder.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/InstrBuilder.h +++ llvm/trunk/tools/llvm-mca/include/InstrBuilder.h @@ -0,0 +1,90 @@ +//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A builder class for instructions that are statically analyzed by llvm-mca. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H +#define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H + +#include "Instruction.h" +#include "Support.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Error.h" + +namespace mca { + +class DispatchUnit; + +/// A builder class that knows how to construct Instruction objects. +/// +/// Every llvm-mca Instruction is described by an object of class InstrDesc. +/// An InstrDesc describes which registers are read/written by the instruction, +/// as well as the instruction latency and hardware resources consumed. +/// +/// This class is used by the tool to construct Instructions and instruction +/// descriptors (i.e. InstrDesc objects). +/// Information from the machine scheduling model is used to identify processor +/// resources that are consumed by an instruction. +class InstrBuilder { + const llvm::MCSubtargetInfo &STI; + const llvm::MCInstrInfo &MCII; + const llvm::MCRegisterInfo &MRI; + const llvm::MCInstrAnalysis &MCIA; + llvm::MCInstPrinter &MCIP; + llvm::SmallVector ProcResourceMasks; + + llvm::DenseMap> Descriptors; + llvm::DenseMap> + VariantDescriptors; + + llvm::Expected + createInstrDescImpl(const llvm::MCInst &MCI); + llvm::Expected + getOrCreateInstrDesc(const llvm::MCInst &MCI); + + InstrBuilder(const InstrBuilder &) = delete; + InstrBuilder &operator=(const InstrBuilder &) = delete; + + llvm::Error populateWrites(InstrDesc &ID, const llvm::MCInst &MCI, + unsigned SchedClassID); + llvm::Error populateReads(InstrDesc &ID, const llvm::MCInst &MCI, + unsigned SchedClassID); + +public: + InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, + const llvm::MCRegisterInfo &mri, + const llvm::MCInstrAnalysis &mcia, llvm::MCInstPrinter &mcip) + : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), MCIP(mcip), + ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) { + computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); + } + + // Returns an array of processor resource masks. + // Masks are computed by function mca::computeProcResourceMasks. see + // Support.h for a description of how masks are computed and how masks can be + // used to solve set membership problems. + llvm::ArrayRef getProcResourceMasks() const { + return ProcResourceMasks; + } + + void clear() { VariantDescriptors.shrink_and_clear(); } + + llvm::Expected> + createInstruction(const llvm::MCInst &MCI); +}; +} // namespace mca + +#endif Index: llvm/trunk/tools/llvm-mca/include/Instruction.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Instruction.h +++ llvm/trunk/tools/llvm-mca/include/Instruction.h @@ -0,0 +1,449 @@ +//===--------------------- Instruction.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines abstractions used by the Pipeline to model register reads, +/// register writes and instructions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H +#define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H + +#include "llvm/Support/MathExtras.h" + +#ifndef NDEBUG +#include "llvm/Support/raw_ostream.h" +#endif + +#include +#include +#include + +namespace mca { + +constexpr int UNKNOWN_CYCLES = -512; + +/// A register write descriptor. +struct WriteDescriptor { + // Operand index. The index is negative for implicit writes only. + // For implicit writes, the actual operand index is computed performing + // a bitwise not of the OpIndex. + int OpIndex; + // Write latency. Number of cycles before write-back stage. + unsigned Latency; + // This field is set to a value different than zero only if this + // is an implicit definition. + unsigned RegisterID; + // Instruction itineraries would set this field to the SchedClass ID. + // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry + // element associated to this write. + // When computing read latencies, this value is matched against the + // "ReadAdvance" information. The hardware backend may implement + // dedicated forwarding paths to quickly propagate write results to dependent + // instructions waiting in the reservation station (effectively bypassing the + // write-back stage). + unsigned SClassOrWriteResourceID; + // True only if this is a write obtained from an optional definition. + // Optional definitions are allowed to reference regID zero (i.e. "no + // register"). + bool IsOptionalDef; + + bool isImplicitWrite() const { return OpIndex < 0; }; +}; + +/// A register read descriptor. +struct ReadDescriptor { + // A MCOperand index. This is used by the Dispatch logic to identify register + // reads. Implicit reads have negative indices. The actual operand index of an + // implicit read is the bitwise not of field OpIndex. + int OpIndex; + // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit + // uses always come first in the sequence of uses. + unsigned UseIndex; + // This field is only set if this is an implicit read. + unsigned RegisterID; + // Scheduling Class Index. It is used to query the scheduling model for the + // MCSchedClassDesc object. + unsigned SchedClassID; + + bool isImplicitRead() const { return OpIndex < 0; }; +}; + +class ReadState; + +/// Tracks uses of a register definition (e.g. register write). +/// +/// Each implicit/explicit register write is associated with an instance of +/// this class. A WriteState object tracks the dependent users of a +/// register write. It also tracks how many cycles are left before the write +/// back stage. +class WriteState { + const WriteDescriptor &WD; + // On instruction issue, this field is set equal to the write latency. + // Before instruction issue, this field defaults to -512, a special + // value that represents an "unknown" number of cycles. + int CyclesLeft; + + // Actual register defined by this write. This field is only used + // to speedup queries on the register file. + // For implicit writes, this field always matches the value of + // field RegisterID from WD. + unsigned RegisterID; + + // True if this write implicitly clears the upper portion of RegisterID's + // super-registers. + bool ClearsSuperRegs; + + // This field is set if this is a partial register write, and it has a false + // dependency on any previous write of the same register (or a portion of it). + // DependentWrite must be able to complete before this write completes, so + // that we don't break the WAW, and the two writes can be merged together. + const WriteState *DependentWrite; + + // Number of writes that are in a WAW dependency with this write. + unsigned NumWriteUsers; + + // A list of dependent reads. Users is a set of dependent + // reads. A dependent read is added to the set only if CyclesLeft + // is "unknown". As soon as CyclesLeft is 'known', each user in the set + // gets notified with the actual CyclesLeft. + + // The 'second' element of a pair is a "ReadAdvance" number of cycles. + std::set> Users; + +public: + WriteState(const WriteDescriptor &Desc, unsigned RegID, + bool clearsSuperRegs = false) + : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), + ClearsSuperRegs(clearsSuperRegs), DependentWrite(nullptr), + NumWriteUsers(0U) {} + WriteState(const WriteState &Other) = delete; + WriteState &operator=(const WriteState &Other) = delete; + + int getCyclesLeft() const { return CyclesLeft; } + unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; } + unsigned getRegisterID() const { return RegisterID; } + unsigned getLatency() const { return WD.Latency; } + + void addUser(ReadState *Use, int ReadAdvance); + + unsigned getNumUsers() const { return Users.size() + NumWriteUsers; } + bool clearsSuperRegisters() const { return ClearsSuperRegs; } + + const WriteState *getDependentWrite() const { return DependentWrite; } + void setDependentWrite(WriteState *Other) { + DependentWrite = Other; + ++Other->NumWriteUsers; + } + + // On every cycle, update CyclesLeft and notify dependent users. + void cycleEvent(); + void onInstructionIssued(); + +#ifndef NDEBUG + void dump() const; +#endif +}; + +/// Tracks register operand latency in cycles. +/// +/// A read may be dependent on more than one write. This occurs when some +/// writes only partially update the register associated to this read. +class ReadState { + const ReadDescriptor &RD; + // Physical register identified associated to this read. + unsigned RegisterID; + // Number of writes that contribute to the definition of RegisterID. + // In the absence of partial register updates, the number of DependentWrites + // cannot be more than one. + unsigned DependentWrites; + // Number of cycles left before RegisterID can be read. This value depends on + // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES. + // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of + // every dependent write is known. + int CyclesLeft; + // This field is updated on every writeStartEvent(). When the number of + // dependent writes (i.e. field DependentWrite) is zero, this value is + // propagated to field CyclesLeft. + unsigned TotalCycles; + // This field is set to true only if there are no dependent writes, and + // there are no `CyclesLeft' to wait. + bool IsReady; + +public: + ReadState(const ReadDescriptor &Desc, unsigned RegID) + : RD(Desc), RegisterID(RegID), DependentWrites(0), + CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {} + ReadState(const ReadState &Other) = delete; + ReadState &operator=(const ReadState &Other) = delete; + + const ReadDescriptor &getDescriptor() const { return RD; } + unsigned getSchedClass() const { return RD.SchedClassID; } + unsigned getRegisterID() const { return RegisterID; } + + bool isReady() const { return IsReady; } + bool isImplicitRead() const { return RD.isImplicitRead(); } + + void cycleEvent(); + void writeStartEvent(unsigned Cycles); + void setDependentWrites(unsigned Writes) { + DependentWrites = Writes; + IsReady = !Writes; + } +}; + +/// A sequence of cycles. +/// +/// This class can be used as a building block to construct ranges of cycles. +class CycleSegment { + unsigned Begin; // Inclusive. + unsigned End; // Exclusive. + bool Reserved; // Resources associated to this segment must be reserved. + +public: + CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false) + : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {} + + bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; } + bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; } + bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; } + bool overlaps(const CycleSegment &CS) const { + return !startsAfter(CS) && !endsBefore(CS); + } + bool isExecuting() const { return Begin == 0 && End != 0; } + bool isExecuted() const { return End == 0; } + bool operator<(const CycleSegment &Other) const { + return Begin < Other.Begin; + } + CycleSegment &operator--(void) { + if (Begin) + Begin--; + if (End) + End--; + return *this; + } + + bool isValid() const { return Begin <= End; } + unsigned size() const { return End - Begin; }; + void Subtract(unsigned Cycles) { + assert(End >= Cycles); + End -= Cycles; + } + + unsigned begin() const { return Begin; } + unsigned end() const { return End; } + void setEnd(unsigned NewEnd) { End = NewEnd; } + bool isReserved() const { return Reserved; } + void setReserved() { Reserved = true; } +}; + +/// Helper used by class InstrDesc to describe how hardware resources +/// are used. +/// +/// This class describes how many resource units of a specific resource kind +/// (and how many cycles) are "used" by an instruction. +struct ResourceUsage { + CycleSegment CS; + unsigned NumUnits; + ResourceUsage(CycleSegment Cycles, unsigned Units = 1) + : CS(Cycles), NumUnits(Units) {} + unsigned size() const { return CS.size(); } + bool isReserved() const { return CS.isReserved(); } + void setReserved() { CS.setReserved(); } +}; + +/// An instruction descriptor +struct InstrDesc { + std::vector Writes; // Implicit writes are at the end. + std::vector Reads; // Implicit reads are at the end. + + // For every resource used by an instruction of this kind, this vector + // reports the number of "consumed cycles". + std::vector> Resources; + + // A list of buffered resources consumed by this instruction. + std::vector Buffers; + unsigned MaxLatency; + // Number of MicroOps for this instruction. + unsigned NumMicroOps; + + bool MayLoad; + bool MayStore; + bool HasSideEffects; + + // A zero latency instruction doesn't consume any scheduler resources. + bool isZeroLatency() const { return !MaxLatency && Resources.empty(); } +}; + +/// An instruction propagated through the simulated instruction pipeline. +/// +/// This class is used to monitor changes to the internal state of instructions +/// that are sent to the various components of the simulated hardware pipeline. +class Instruction { + const InstrDesc &Desc; + + enum InstrStage { + IS_INVALID, // Instruction in an invalid state. + IS_AVAILABLE, // Instruction dispatched but operands are not ready. + IS_READY, // Instruction dispatched and operands ready. + IS_EXECUTING, // Instruction issued. + IS_EXECUTED, // Instruction executed. Values are written back. + IS_RETIRED // Instruction retired. + }; + + // The current instruction stage. + enum InstrStage Stage; + + // This value defaults to the instruction latency. This instruction is + // considered executed when field CyclesLeft goes to zero. + int CyclesLeft; + + // Retire Unit token ID for this instruction. + unsigned RCUTokenID; + + bool IsDepBreaking; + + using UniqueDef = std::unique_ptr; + using UniqueUse = std::unique_ptr; + using VecDefs = std::vector; + using VecUses = std::vector; + + // Output dependencies. + // One entry per each implicit and explicit register definition. + VecDefs Defs; + + // Input dependencies. + // One entry per each implicit and explicit register use. + VecUses Uses; + +public: + Instruction(const InstrDesc &D) + : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0), + IsDepBreaking(false) {} + Instruction(const Instruction &Other) = delete; + Instruction &operator=(const Instruction &Other) = delete; + + VecDefs &getDefs() { return Defs; } + const VecDefs &getDefs() const { return Defs; } + VecUses &getUses() { return Uses; } + const VecUses &getUses() const { return Uses; } + const InstrDesc &getDesc() const { return Desc; } + unsigned getRCUTokenID() const { return RCUTokenID; } + int getCyclesLeft() const { return CyclesLeft; } + + bool hasDependentUsers() const { + return std::any_of(Defs.begin(), Defs.end(), [](const UniqueDef &Def) { + return Def->getNumUsers() > 0; + }); + } + + bool isDependencyBreaking() const { return IsDepBreaking; } + void setDependencyBreaking() { IsDepBreaking = true; } + + unsigned getNumUsers() const { + unsigned NumUsers = 0; + for (const UniqueDef &Def : Defs) + NumUsers += Def->getNumUsers(); + return NumUsers; + } + + // Transition to the dispatch stage, and assign a RCUToken to this + // instruction. The RCUToken is used to track the completion of every + // register write performed by this instruction. + void dispatch(unsigned RCUTokenID); + + // Instruction issued. Transition to the IS_EXECUTING state, and update + // all the definitions. + void execute(); + + // Force a transition from the IS_AVAILABLE state to the IS_READY state if + // input operands are all ready. State transitions normally occur at the + // beginning of a new cycle (see method cycleEvent()). However, the scheduler + // may decide to promote instructions from the wait queue to the ready queue + // as the result of another issue event. This method is called every time the + // instruction might have changed in state. + void update(); + + bool isDispatched() const { return Stage == IS_AVAILABLE; } + bool isReady() const { return Stage == IS_READY; } + bool isExecuting() const { return Stage == IS_EXECUTING; } + bool isExecuted() const { return Stage == IS_EXECUTED; } + bool isRetired() const { return Stage == IS_RETIRED; } + + void retire() { + assert(isExecuted() && "Instruction is in an invalid state!"); + Stage = IS_RETIRED; + } + + void cycleEvent(); +}; + +/// An InstRef contains both a SourceMgr index and Instruction pair. The index +/// is used as a unique identifier for the instruction. MCA will make use of +/// this index as a key throughout MCA. +class InstRef : public std::pair { +public: + InstRef() : std::pair(0, nullptr) {} + InstRef(unsigned Index, Instruction *I) + : std::pair(Index, I) {} + + unsigned getSourceIndex() const { return first; } + Instruction *getInstruction() { return second; } + const Instruction *getInstruction() const { return second; } + + /// Returns true if this references a valid instruction. + bool isValid() const { return second != nullptr; } + + /// Invalidate this reference. + void invalidate() { second = nullptr; } + +#ifndef NDEBUG + void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); } +#endif +}; + +#ifndef NDEBUG +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InstRef &IR) { + IR.print(OS); + return OS; +} +#endif + +/// A reference to a register write. +/// +/// This class is mainly used by the register file to describe register +/// mappings. It correlates a register write to the source index of the +/// defining instruction. +class WriteRef { + std::pair Data; + static const unsigned INVALID_IID; + +public: + WriteRef() : Data(INVALID_IID, nullptr) {} + WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {} + + unsigned getSourceIndex() const { return Data.first; } + const WriteState *getWriteState() const { return Data.second; } + WriteState *getWriteState() { return Data.second; } + void invalidate() { Data = std::make_pair(INVALID_IID, nullptr); } + + bool isValid() const { + return Data.first != INVALID_IID && Data.second != nullptr; + } + bool operator==(const WriteRef &Other) const { return Data == Other.Data; } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca + +#endif Index: llvm/trunk/tools/llvm-mca/include/Pipeline.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Pipeline.h +++ llvm/trunk/tools/llvm-mca/include/Pipeline.h @@ -0,0 +1,76 @@ +//===--------------------- Pipeline.h ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements an ordered container of stages that simulate the +/// pipeline of a hardware backend. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINE_H +#define LLVM_TOOLS_LLVM_MCA_PIPELINE_H + +#include "HardwareUnits/Scheduler.h" +#include "Stages/Stage.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Error.h" + +namespace mca { + +class HWEventListener; +class HWInstructionEvent; +class HWStallEvent; + +/// A pipeline for a specific subtarget. +/// +/// It emulates an out-of-order execution of instructions. Instructions are +/// fetched from a MCInst sequence managed by an initial 'Fetch' stage. +/// Instructions are firstly fetched, then dispatched to the schedulers, and +/// then executed. +/// +/// This class tracks the lifetime of an instruction from the moment where +/// it gets dispatched to the schedulers, to the moment where it finishes +/// executing and register writes are architecturally committed. +/// In particular, it monitors changes in the state of every instruction +/// in flight. +/// +/// Instructions are executed in a loop of iterations. The number of iterations +/// is defined by the SourceMgr object, which is managed by the initial stage +/// of the instruction pipeline. +/// +/// The Pipeline entry point is method 'run()' which executes cycles in a loop +/// until there are new instructions to dispatch, and not every instruction +/// has been retired. +/// +/// Internally, the Pipeline collects statistical information in the form of +/// histograms. For example, it tracks how the dispatch group size changes +/// over time. +class Pipeline { + Pipeline(const Pipeline &P) = delete; + Pipeline &operator=(const Pipeline &P) = delete; + + /// An ordered list of stages that define this instruction pipeline. + llvm::SmallVector, 8> Stages; + std::set Listeners; + unsigned Cycles; + + llvm::Error runCycle(); + bool hasWorkToProcess(); + void notifyCycleBegin(); + void notifyCycleEnd(); + +public: + Pipeline() : Cycles(0) {} + void appendStage(std::unique_ptr S); + llvm::Error run(); + void addEventListener(HWEventListener *Listener); +}; +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_PIPELINE_H Index: llvm/trunk/tools/llvm-mca/include/SourceMgr.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/SourceMgr.h +++ llvm/trunk/tools/llvm-mca/include/SourceMgr.h @@ -0,0 +1,64 @@ +//===--------------------- SourceMgr.h --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements class SourceMgr. Class SourceMgr abstracts the input +/// code sequence (a sequence of MCInst), and assings unique identifiers to +/// every instruction in the sequence. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H +#define LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H + +#include "llvm/MC/MCInst.h" +#include + +namespace mca { + +typedef std::pair SourceRef; + +class SourceMgr { + using InstVec = std::vector>; + const InstVec &Sequence; + unsigned Current; + unsigned Iterations; + static const unsigned DefaultIterations = 100; + +public: + SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations) + : Sequence(MCInstSequence), Current(0), + Iterations(NumIterations ? NumIterations : DefaultIterations) {} + + unsigned getCurrentIteration() const { return Current / Sequence.size(); } + unsigned getNumIterations() const { return Iterations; } + unsigned size() const { return Sequence.size(); } + const InstVec &getSequence() const { return Sequence; } + + bool hasNext() const { return Current < (Iterations * size()); } + void updateNext() { Current++; } + + const SourceRef peekNext() const { + assert(hasNext() && "Already at end of sequence!"); + unsigned Index = getCurrentInstructionIndex(); + return SourceRef(Current, Sequence[Index].get()); + } + + unsigned getCurrentInstructionIndex() const { + return Current % Sequence.size(); + } + + const llvm::MCInst &getMCInstFromIndex(unsigned Index) const { + return *Sequence[Index % size()]; + } + + bool isEmpty() const { return size() == 0; } +}; +} // namespace mca + +#endif Index: llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h @@ -0,0 +1,95 @@ +//===----------------------- DispatchStage.h --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file models the dispatch component of an instruction pipeline. +/// +/// The DispatchStage is responsible for updating instruction dependencies +/// and communicating to the simulated instruction scheduler that an instruction +/// is ready to be scheduled for execution. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H +#define LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H + +#include "HWEventListener.h" +#include "HardwareUnits/RegisterFile.h" +#include "HardwareUnits/RetireControlUnit.h" +#include "Instruction.h" +#include "Stages/Stage.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace mca { + +// Implements the hardware dispatch logic. +// +// This class is responsible for the dispatch stage, in which instructions are +// dispatched in groups to the Scheduler. An instruction can be dispatched if +// the following conditions are met: +// 1) There are enough entries in the reorder buffer (see class +// RetireControlUnit) to write the opcodes associated with the instruction. +// 2) There are enough physical registers to rename output register operands. +// 3) There are enough entries available in the used buffered resource(s). +// +// The number of micro opcodes that can be dispatched in one cycle is limited by +// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when +// processor resources are not available. Dispatch stall events are counted +// during the entire execution of the code, and displayed by the performance +// report when flag '-dispatch-stats' is specified. +// +// If the number of micro opcodes exceedes DispatchWidth, then the instruction +// is dispatched in multiple cycles. +class DispatchStage final : public Stage { + unsigned DispatchWidth; + unsigned AvailableEntries; + unsigned CarryOver; + const llvm::MCSubtargetInfo &STI; + RetireControlUnit &RCU; + RegisterFile &PRF; + + bool checkRCU(const InstRef &IR) const; + bool checkPRF(const InstRef &IR) const; + bool canDispatch(const InstRef &IR) const; + llvm::Error dispatch(InstRef IR); + + void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI); + + void notifyInstructionDispatched(const InstRef &IR, + llvm::ArrayRef UsedPhysRegs); + + void collectWrites(llvm::SmallVectorImpl &Vec, + unsigned RegID) const { + return PRF.collectWrites(Vec, RegID); + } + +public: + DispatchStage(const llvm::MCSubtargetInfo &Subtarget, + const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize, + unsigned MaxDispatchWidth, RetireControlUnit &R, + RegisterFile &F) + : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), + CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) {} + + bool isAvailable(const InstRef &IR) const override; + + // The dispatch logic internally doesn't buffer instructions. So there is + // never work to do at the beginning of every cycle. + bool hasWorkToComplete() const override { return false; } + llvm::Error cycleStart() override; + llvm::Error execute(InstRef &IR) override; + +#ifndef NDEBUG + void dump() const; +#endif +}; +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h @@ -0,0 +1,78 @@ +//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the execution stage of a default instruction pipeline. +/// +/// The ExecuteStage is responsible for managing the hardware scheduler +/// and issuing notifications that an instruction has been executed. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H +#define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H + +#include "HardwareUnits/Scheduler.h" +#include "Instruction.h" +#include "Stages/Stage.h" +#include "llvm/ADT/ArrayRef.h" + +namespace mca { + +class ExecuteStage final : public Stage { + Scheduler &HWS; + + llvm::Error issueInstruction(InstRef &IR); + + // Called at the beginning of each cycle to issue already dispatched + // instructions to the underlying pipelines. + llvm::Error issueReadyInstructions(); + + ExecuteStage(const ExecuteStage &Other) = delete; + ExecuteStage &operator=(const ExecuteStage &Other) = delete; + +public: + ExecuteStage(Scheduler &S) : Stage(), HWS(S) {} + + // This stage works under the assumption that the Pipeline will eventually + // execute a retire stage. We don't need to check if pipelines and/or + // schedulers have instructions to process, because those instructions are + // also tracked by the retire control unit. That means, + // RetireControlUnit::hasWorkToComplete() is responsible for checking if there + // are still instructions in-flight in the out-of-order backend. + bool hasWorkToComplete() const override { return false; } + bool isAvailable(const InstRef &IR) const override; + + // Notifies the scheduler that a new cycle just started. + // + // This method notifies the scheduler that a new cycle started. + // This method is also responsible for notifying listeners about instructions + // state changes, and processor resources freed by the scheduler. + // Instructions that transitioned to the 'Executed' state are automatically + // moved to the next stage (i.e. RetireStage). + llvm::Error cycleStart() override; + llvm::Error execute(InstRef &IR) override; + + void + notifyInstructionIssued(const InstRef &IR, + llvm::ArrayRef> Used); + void notifyInstructionExecuted(const InstRef &IR); + void notifyInstructionReady(const InstRef &IR); + void notifyResourceAvailable(const ResourceRef &RR); + + // Notify listeners that buffered resources were consumed. + void notifyReservedBuffers(llvm::ArrayRef Buffers); + + // Notify listeners that buffered resources were freed. + void notifyReleasedBuffers(llvm::ArrayRef Buffers); +}; + +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/FetchStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/FetchStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/FetchStage.h @@ -0,0 +1,52 @@ +//===---------------------- FetchStage.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the Fetch stage of an instruction pipeline. Its sole +/// purpose in life is to produce instructions for the rest of the pipeline. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H +#define LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H + +#include "InstrBuilder.h" +#include "SourceMgr.h" +#include "Stages/Stage.h" +#include + +namespace mca { + +class FetchStage final : public Stage { + std::unique_ptr CurrentInstruction; + using InstMap = std::map>; + InstMap Instructions; + InstrBuilder &IB; + SourceMgr &SM; + + // Updates the program counter, and sets 'CurrentInstruction'. + llvm::Error getNextInstruction(); + + FetchStage(const FetchStage &Other) = delete; + FetchStage &operator=(const FetchStage &Other) = delete; + +public: + FetchStage(InstrBuilder &IB, SourceMgr &SM) + : CurrentInstruction(), IB(IB), SM(SM) {} + + bool isAvailable(const InstRef &IR) const override; + bool hasWorkToComplete() const override; + llvm::Error execute(InstRef &IR) override; + llvm::Error cycleStart() override; + llvm::Error cycleEnd() override; +}; + +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h +++ llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h @@ -0,0 +1,42 @@ +//===--------------------- InstructionTables.h ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a custom stage to generate instruction tables. +/// See the description of command-line flag -instruction-tables in +/// docs/CommandGuide/lvm-mca.rst +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H +#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H + +#include "HardwareUnits/Scheduler.h" +#include "InstrBuilder.h" +#include "Stages/Stage.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" + +namespace mca { + +class InstructionTables final : public Stage { + const llvm::MCSchedModel &SM; + InstrBuilder &IB; + llvm::SmallVector, 4> UsedResources; + +public: + InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder) + : Stage(), SM(Model), IB(Builder) {} + + bool hasWorkToComplete() const override { return false; } + llvm::Error execute(InstRef &IR) override; +}; +} // namespace mca + +#endif Index: llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h @@ -0,0 +1,46 @@ +//===---------------------- RetireStage.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the retire stage of a default instruction pipeline. +/// The RetireStage represents the process logic that interacts with the +/// simulated RetireControlUnit hardware. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H +#define LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H + +#include "HardwareUnits/RegisterFile.h" +#include "HardwareUnits/RetireControlUnit.h" +#include "Stages/Stage.h" + +namespace mca { + +class RetireStage final : public Stage { + // Owner will go away when we move listeners/eventing to the stages. + RetireControlUnit &RCU; + RegisterFile &PRF; + + RetireStage(const RetireStage &Other) = delete; + RetireStage &operator=(const RetireStage &Other) = delete; + +public: + RetireStage(RetireControlUnit &R, RegisterFile &F) + : Stage(), RCU(R), PRF(F) {} + + bool hasWorkToComplete() const override { return !RCU.isEmpty(); } + llvm::Error cycleStart() override; + llvm::Error execute(InstRef &IR) override; + void notifyInstructionRetired(const InstRef &IR); +}; + +} // namespace mca + +#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/Stage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/Stage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/Stage.h @@ -0,0 +1,86 @@ +//===---------------------- Stage.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a stage. +/// A chain of stages compose an instruction pipeline. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_STAGE_H +#define LLVM_TOOLS_LLVM_MCA_STAGE_H + +#include "HWEventListener.h" +#include "llvm/Support/Error.h" +#include + +namespace mca { + +class InstRef; + +class Stage { + Stage *NextInSequence; + std::set Listeners; + + Stage(const Stage &Other) = delete; + Stage &operator=(const Stage &Other) = delete; + +protected: + const std::set &getListeners() const { return Listeners; } + +public: + Stage() : NextInSequence(nullptr) {} + virtual ~Stage(); + + /// Returns true if it can execute IR during this cycle. + virtual bool isAvailable(const InstRef &IR) const { return true; } + + /// Returns true if some instructions are still executing this stage. + virtual bool hasWorkToComplete() const = 0; + + /// Called once at the start of each cycle. This can be used as a setup + /// phase to prepare for the executions during the cycle. + virtual llvm::Error cycleStart() { return llvm::ErrorSuccess(); } + + /// Called once at the end of each cycle. + virtual llvm::Error cycleEnd() { return llvm::ErrorSuccess(); } + + /// The primary action that this stage performs on instruction IR. + virtual llvm::Error execute(InstRef &IR) = 0; + + void setNextInSequence(Stage *NextStage) { + assert(!NextInSequence && "This stage already has a NextInSequence!"); + NextInSequence = NextStage; + } + + bool checkNextStage(const InstRef &IR) const { + return NextInSequence && NextInSequence->isAvailable(IR); + } + + /// Called when an instruction is ready to move the next pipeline stage. + /// + /// Stages are responsible for moving instructions to their immediate + /// successor stages. + llvm::Error moveToTheNextStage(InstRef &IR) { + assert(checkNextStage(IR) && "Next stage is not ready!"); + return NextInSequence->execute(IR); + } + + /// Add a listener to receive callbacks during the execution of this stage. + void addListener(HWEventListener *Listener); + + /// Notify listeners of a particular hardware event. + template void notifyEvent(const EventT &Event) const { + for (HWEventListener *Listener : Listeners) + Listener->onEvent(Event); + } +}; + +} // namespace mca +#endif // LLVM_TOOLS_LLVM_MCA_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Support.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Support.h +++ llvm/trunk/tools/llvm-mca/include/Support.h @@ -0,0 +1,58 @@ +//===--------------------- Support.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Helper functions used by various pipeline components. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H +#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" + +namespace mca { + +/// Populates vector Masks with processor resource masks. +/// +/// The number of bits set in a mask depends on the processor resource type. +/// Each processor resource mask has at least one bit set. For groups, the +/// number of bits set in the mask is equal to the cardinality of the group plus +/// one. Excluding the most significant bit, the remaining bits in the mask +/// identify processor resources that are part of the group. +/// +/// Example: +/// +/// ResourceA -- Mask: 0b001 +/// ResourceB -- Mask: 0b010 +/// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111 +/// +/// ResourceAB is a processor resource group containing ResourceA and ResourceB. +/// Each resource mask uniquely identifies a resource; both ResourceA and +/// ResourceB only have one bit set. +/// ResourceAB is a group; excluding the most significant bit in the mask, the +/// remaining bits identify the composition of the group. +/// +/// Resource masks are used by the ResourceManager to solve set membership +/// problems with simple bit manipulation operations. +void computeProcResourceMasks(const llvm::MCSchedModel &SM, + llvm::SmallVectorImpl &Masks); + +/// Compute the reciprocal block throughput from a set of processor resource +/// cycles. The reciprocal block throughput is computed as the MAX between: +/// - NumMicroOps / DispatchWidth +/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource). +double computeBlockRThroughput(const llvm::MCSchedModel &SM, + unsigned DispatchWidth, unsigned NumMicroOps, + llvm::ArrayRef ProcResourceUsage); +} // namespace mca + +#endif Index: llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt =================================================================== --- llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt +++ llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt @@ -0,0 +1,33 @@ +include_directories(${LLVM_MCA_SOURCE_DIR}/include) + +add_library(LLVMMCA + STATIC + Context.cpp + HWEventListener.cpp + HardwareUnits/HardwareUnit.cpp + HardwareUnits/LSUnit.cpp + HardwareUnits/RegisterFile.cpp + HardwareUnits/ResourceManager.cpp + HardwareUnits/RetireControlUnit.cpp + HardwareUnits/Scheduler.cpp + InstrBuilder.cpp + Instruction.cpp + Pipeline.cpp + Stages/DispatchStage.cpp + Stages/ExecuteStage.cpp + Stages/FetchStage.cpp + Stages/InstructionTables.cpp + Stages/RetireStage.cpp + Stages/Stage.cpp + Support.cpp + ) + +llvm_update_compile_flags(LLVMMCA) +llvm_map_components_to_libnames(libs + CodeGen + MC + Support + ) + +target_link_libraries(LLVMMCA ${libs}) +set_target_properties(LLVMMCA PROPERTIES FOLDER "Libraries") Index: llvm/trunk/tools/llvm-mca/lib/Context.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Context.cpp +++ llvm/trunk/tools/llvm-mca/lib/Context.cpp @@ -0,0 +1,65 @@ +//===---------------------------- Context.cpp -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a class for holding ownership of various simulated +/// hardware units. A Context also provides a utility routine for constructing +/// a default out-of-order pipeline with fetch, dispatch, execute, and retire +/// stages. +/// +//===----------------------------------------------------------------------===// + +#include "Context.h" +#include "HardwareUnits/RegisterFile.h" +#include "HardwareUnits/RetireControlUnit.h" +#include "HardwareUnits/Scheduler.h" +#include "Stages/DispatchStage.h" +#include "Stages/ExecuteStage.h" +#include "Stages/FetchStage.h" +#include "Stages/RetireStage.h" + +namespace mca { + +using namespace llvm; + +std::unique_ptr +Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, + SourceMgr &SrcMgr) { + const MCSchedModel &SM = STI.getSchedModel(); + + // Create the hardware units defining the backend. + auto RCU = llvm::make_unique(SM); + auto PRF = llvm::make_unique(SM, MRI, Opts.RegisterFileSize); + auto LSU = llvm::make_unique(Opts.LoadQueueSize, Opts.StoreQueueSize, + Opts.AssumeNoAlias); + auto HWS = llvm::make_unique(SM, LSU.get()); + + // Create the pipeline and its stages. + auto StagePipeline = llvm::make_unique(); + auto Fetch = llvm::make_unique(IB, SrcMgr); + auto Dispatch = llvm::make_unique( + STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF); + auto Execute = llvm::make_unique(*HWS); + auto Retire = llvm::make_unique(*RCU, *PRF); + + // Pass the ownership of all the hardware units to this Context. + addHardwareUnit(std::move(RCU)); + addHardwareUnit(std::move(PRF)); + addHardwareUnit(std::move(LSU)); + addHardwareUnit(std::move(HWS)); + + // Build the pipeline. + StagePipeline->appendStage(std::move(Fetch)); + StagePipeline->appendStage(std::move(Dispatch)); + StagePipeline->appendStage(std::move(Execute)); + StagePipeline->appendStage(std::move(Retire)); + return StagePipeline; +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp +++ llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp @@ -0,0 +1,21 @@ +//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a vtable anchor for class HWEventListener. +/// +//===----------------------------------------------------------------------===// + +#include "HWEventListener.h" + +namespace mca { + +// Anchor the vtable here. +void HWEventListener::anchor() {} +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp @@ -0,0 +1,23 @@ +//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the anchor for the base class that describes +/// simulated hardware units. +/// +//===----------------------------------------------------------------------===// + +#include "HardwareUnits/HardwareUnit.h" + +namespace mca { + +// Pin the vtable with this method. +HardwareUnit::~HardwareUnit() = default; + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp @@ -0,0 +1,156 @@ +//===----------------------- LSUnit.cpp --------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A Load-Store Unit for the llvm-mca tool. +/// +//===----------------------------------------------------------------------===// + +#include "HardwareUnits/LSUnit.h" +#include "Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +#ifndef NDEBUG +void LSUnit::dump() const { + dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n'; + dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n'; + dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n'; + dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n'; +} +#endif + +void LSUnit::assignLQSlot(unsigned Index) { + assert(!isLQFull()); + assert(LoadQueue.count(Index) == 0); + + LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot \n"); + LoadQueue.insert(Index); +} + +void LSUnit::assignSQSlot(unsigned Index) { + assert(!isSQFull()); + assert(StoreQueue.count(Index) == 0); + + LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot \n"); + StoreQueue.insert(Index); +} + +void LSUnit::dispatch(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + unsigned IsMemBarrier = Desc.HasSideEffects; + assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); + + const unsigned Index = IR.getSourceIndex(); + if (Desc.MayLoad) { + if (IsMemBarrier) + LoadBarriers.insert(Index); + assignLQSlot(Index); + } + + if (Desc.MayStore) { + if (IsMemBarrier) + StoreBarriers.insert(Index); + assignSQSlot(Index); + } +} + +LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + if (Desc.MayLoad && isLQFull()) + return LSUnit::LSU_LQUEUE_FULL; + if (Desc.MayStore && isSQFull()) + return LSUnit::LSU_SQUEUE_FULL; + return LSUnit::LSU_AVAILABLE; +} + +bool LSUnit::isReady(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + const unsigned Index = IR.getSourceIndex(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + assert((IsALoad || IsAStore) && "Not a memory operation!"); + assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!"); + assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!"); + + if (IsALoad && !LoadBarriers.empty()) { + unsigned LoadBarrierIndex = *LoadBarriers.begin(); + if (Index > LoadBarrierIndex) + return false; + if (Index == LoadBarrierIndex && Index != *LoadQueue.begin()) + return false; + } + + if (IsAStore && !StoreBarriers.empty()) { + unsigned StoreBarrierIndex = *StoreBarriers.begin(); + if (Index > StoreBarrierIndex) + return false; + if (Index == StoreBarrierIndex && Index != *StoreQueue.begin()) + return false; + } + + if (NoAlias && IsALoad) + return true; + + if (StoreQueue.size()) { + // Check if this memory operation is younger than the older store. + if (Index > *StoreQueue.begin()) + return false; + } + + // Okay, we are older than the oldest store in the queue. + // If there are no pending loads, then we can say for sure that this + // instruction is ready. + if (isLQEmpty()) + return true; + + // Check if there are no older loads. + if (Index <= *LoadQueue.begin()) + return true; + + // There is at least one younger load. + return !IsAStore; +} + +void LSUnit::onInstructionExecuted(const InstRef &IR) { + const unsigned Index = IR.getSourceIndex(); + std::set::iterator it = LoadQueue.find(Index); + if (it != LoadQueue.end()) { + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the load queue.\n"); + LoadQueue.erase(it); + } + + it = StoreQueue.find(Index); + if (it != StoreQueue.end()) { + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the store queue.\n"); + StoreQueue.erase(it); + } + + if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) { + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the set of store barriers.\n"); + StoreBarriers.erase(StoreBarriers.begin()); + } + if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) { + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the set of load barriers.\n"); + LoadBarriers.erase(LoadBarriers.begin()); + } +} +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp @@ -0,0 +1,350 @@ +//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a register mapping file class. This class is responsible +/// for managing hardware register files and the tracking of data dependencies +/// between registers. +/// +//===----------------------------------------------------------------------===// + +#include "HardwareUnits/RegisterFile.h" +#include "Instruction.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +RegisterFile::RegisterFile(const llvm::MCSchedModel &SM, + const llvm::MCRegisterInfo &mri, unsigned NumRegs) + : MRI(mri), RegisterMappings(mri.getNumRegs(), + {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) { + initialize(SM, NumRegs); +} + +void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) { + // Create a default register file that "sees" all the machine registers + // declared by the target. The number of physical registers in the default + // register file is set equal to `NumRegs`. A value of zero for `NumRegs` + // means: this register file has an unbounded number of physical registers. + addRegisterFile({} /* all registers */, NumRegs); + if (!SM.hasExtraProcessorInfo()) + return; + + // For each user defined register file, allocate a RegisterMappingTracker + // object. The size of every register file, as well as the mapping between + // register files and register classes is specified via tablegen. + const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo(); + for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) { + const MCRegisterFileDesc &RF = Info.RegisterFiles[I]; + // Skip invalid register files with zero physical registers. + unsigned Length = RF.NumRegisterCostEntries; + if (!RF.NumPhysRegs) + continue; + // The cost of a register definition is equivalent to the number of + // physical registers that are allocated at register renaming stage. + const MCRegisterCostEntry *FirstElt = + &Info.RegisterCostTable[RF.RegisterCostEntryIdx]; + addRegisterFile(ArrayRef(FirstElt, Length), + RF.NumPhysRegs); + } +} + +void RegisterFile::addRegisterFile(ArrayRef Entries, + unsigned NumPhysRegs) { + // A default register file is always allocated at index #0. That register file + // is mainly used to count the total number of mappings created by all + // register files at runtime. Users can limit the number of available physical + // registers in register file #0 through the command line flag + // `-register-file-size`. + unsigned RegisterFileIndex = RegisterFiles.size(); + RegisterFiles.emplace_back(NumPhysRegs); + + // Special case where there is no register class identifier in the set. + // An empty set of register classes means: this register file contains all + // the physical registers specified by the target. + // We optimistically assume that a register can be renamed at the cost of a + // single physical register. The constructor of RegisterFile ensures that + // a RegisterMapping exists for each logical register defined by the Target. + if (Entries.empty()) + return; + + // Now update the cost of individual registers. + for (const MCRegisterCostEntry &RCE : Entries) { + const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); + for (const MCPhysReg Reg : RC) { + RegisterRenamingInfo &Entry = RegisterMappings[Reg].second; + IndexPlusCostPairTy &IPC = Entry.IndexPlusCost; + if (IPC.first && IPC.first != RegisterFileIndex) { + // The only register file that is allowed to overlap is the default + // register file at index #0. The analysis is inaccurate if register + // files overlap. + errs() << "warning: register " << MRI.getName(Reg) + << " defined in multiple register files."; + } + IPC = std::make_pair(RegisterFileIndex, RCE.Cost); + Entry.RenameAs = Reg; + + // Assume the same cost for each sub-register. + for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) { + RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second; + if (!OtherEntry.IndexPlusCost.first && + (!OtherEntry.RenameAs || + MRI.isSuperRegister(*I, OtherEntry.RenameAs))) { + OtherEntry.IndexPlusCost = IPC; + OtherEntry.RenameAs = Reg; + } + } + } + } +} + +void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef UsedPhysRegs) { + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; + if (RegisterFileIndex) { + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + RMT.NumUsedPhysRegs += Cost; + UsedPhysRegs[RegisterFileIndex] += Cost; + } + + // Now update the default register mapping tracker. + RegisterFiles[0].NumUsedPhysRegs += Cost; + UsedPhysRegs[0] += Cost; +} + +void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef FreedPhysRegs) { + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; + if (RegisterFileIndex) { + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + RMT.NumUsedPhysRegs -= Cost; + FreedPhysRegs[RegisterFileIndex] += Cost; + } + + // Now update the default register mapping tracker. + RegisterFiles[0].NumUsedPhysRegs -= Cost; + FreedPhysRegs[0] += Cost; +} + +void RegisterFile::addRegisterWrite(WriteRef Write, + MutableArrayRef UsedPhysRegs, + bool ShouldAllocatePhysRegs) { + WriteState &WS = *Write.getWriteState(); + unsigned RegID = WS.getRegisterID(); + assert(RegID && "Adding an invalid register definition?"); + + LLVM_DEBUG({ + dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex() + << ", " << MRI.getName(RegID) << "]\n"; + }); + + // If RenameAs is equal to RegID, then RegID is subject to register renaming + // and false dependencies on RegID are all eliminated. + + // If RenameAs references the invalid register, then we optimistically assume + // that it can be renamed. In the absence of tablegen descriptors for register + // files, RenameAs is always set to the invalid register ID. In all other + // cases, RenameAs must be either equal to RegID, or it must reference a + // super-register of RegID. + + // If RenameAs is a super-register of RegID, then a write to RegID has always + // a false dependency on RenameAs. The only exception is for when the write + // implicitly clears the upper portion of the underlying register. + // If a write clears its super-registers, then it is renamed as `RenameAs`. + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + if (RRI.RenameAs && RRI.RenameAs != RegID) { + RegID = RRI.RenameAs; + WriteRef &OtherWrite = RegisterMappings[RegID].first; + + if (!WS.clearsSuperRegisters()) { + // The processor keeps the definition of `RegID` together with register + // `RenameAs`. Since this partial write is not renamed, no physical + // register is allocated. + ShouldAllocatePhysRegs = false; + + if (OtherWrite.getWriteState() && + (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { + // This partial write has a false dependency on RenameAs. + WS.setDependentWrite(OtherWrite.getWriteState()); + } + } + } + + // Update the mapping for register RegID including its sub-registers. + RegisterMappings[RegID].first = Write; + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) + RegisterMappings[*I].first = Write; + + // No physical registers are allocated for instructions that are optimized in + // hardware. For example, zero-latency data-dependency breaking instructions + // don't consume physical registers. + if (ShouldAllocatePhysRegs) + allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); + + if (!WS.clearsSuperRegisters()) + return; + + for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) + RegisterMappings[*I].first = Write; +} + +void RegisterFile::removeRegisterWrite(const WriteState &WS, + MutableArrayRef FreedPhysRegs, + bool ShouldFreePhysRegs) { + unsigned RegID = WS.getRegisterID(); + + assert(RegID != 0 && "Invalidating an already invalid register?"); + assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && + "Invalidating a write of unknown cycles!"); + assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); + + unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; + if (RenameAs && RenameAs != RegID) { + RegID = RenameAs; + + if (!WS.clearsSuperRegisters()) { + // Keep the definition of `RegID` together with register `RenameAs`. + ShouldFreePhysRegs = false; + } + } + + if (ShouldFreePhysRegs) + freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs); + + WriteRef &WR = RegisterMappings[RegID].first; + if (WR.getWriteState() == &WS) + WR.invalidate(); + + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); + } + + if (!WS.clearsSuperRegisters()) + return; + + for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); + } +} + +void RegisterFile::collectWrites(SmallVectorImpl &Writes, + unsigned RegID) const { + assert(RegID && RegID < RegisterMappings.size()); + LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " + << MRI.getName(RegID) << '\n'); + const WriteRef &WR = RegisterMappings[RegID].first; + if (WR.isValid()) + Writes.push_back(WR); + + // Handle potential partial register updates. + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + const WriteRef &WR = RegisterMappings[*I].first; + if (WR.isValid()) + Writes.push_back(WR); + } + + // Remove duplicate entries and resize the input vector. + llvm::sort(Writes.begin(), Writes.end(), + [](const WriteRef &Lhs, const WriteRef &Rhs) { + return Lhs.getWriteState() < Rhs.getWriteState(); + }); + auto It = std::unique(Writes.begin(), Writes.end()); + Writes.resize(std::distance(Writes.begin(), It)); + + LLVM_DEBUG({ + for (const WriteRef &WR : Writes) { + const WriteState &WS = *WR.getWriteState(); + dbgs() << "[PRF] Found a dependent use of Register " + << MRI.getName(WS.getRegisterID()) << " (defined by intruction #" + << WR.getSourceIndex() << ")\n"; + } + }); +} + +unsigned RegisterFile::isAvailable(ArrayRef Regs) const { + SmallVector NumPhysRegs(getNumRegisterFiles()); + + // Find how many new mappings must be created for each register file. + for (const unsigned RegID : Regs) { + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; + if (Entry.first) + NumPhysRegs[Entry.first] += Entry.second; + NumPhysRegs[0] += Entry.second; + } + + unsigned Response = 0; + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + unsigned NumRegs = NumPhysRegs[I]; + if (!NumRegs) + continue; + + const RegisterMappingTracker &RMT = RegisterFiles[I]; + if (!RMT.NumPhysRegs) { + // The register file has an unbounded number of microarchitectural + // registers. + continue; + } + + if (RMT.NumPhysRegs < NumRegs) { + // The current register file is too small. This may occur if the number of + // microarchitectural registers in register file #0 was changed by the + // users via flag -reg-file-size. Alternatively, the scheduling model + // specified a too small number of registers for this register file. + LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n"); + + // FIXME: Normalize the instruction register count to match the + // NumPhysRegs value. This is a highly unusual case, and is not expected + // to occur. This normalization is hiding an inconsistency in either the + // scheduling model or in the value that the user might have specified + // for NumPhysRegs. + NumRegs = RMT.NumPhysRegs; + } + + if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs)) + Response |= (1U << I); + } + + return Response; +} + +#ifndef NDEBUG +void RegisterFile::dump() const { + for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { + const RegisterMapping &RM = RegisterMappings[I]; + if (!RM.first.getWriteState()) + continue; + const RegisterRenamingInfo &RRI = RM.second; + dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first + << ", Cost=" << RRI.IndexPlusCost.second + << ", RenameAs=" << RRI.RenameAs << ", "; + RM.first.dump(); + dbgs() << '\n'; + } + + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + dbgs() << "Register File #" << I; + const RegisterMappingTracker &RMT = RegisterFiles[I]; + dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs + << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n'; + } +} +#endif + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp @@ -0,0 +1,309 @@ +//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The classes here represent processor resource units and their management +/// strategy. These classes are managed by the Scheduler. +/// +//===----------------------------------------------------------------------===// + +#include "HardwareUnits/ResourceManager.h" +#include "Support.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace mca { + +using namespace llvm; + +#define DEBUG_TYPE "llvm-mca" +ResourceStrategy::~ResourceStrategy() = default; + +void DefaultResourceStrategy::skipMask(uint64_t Mask) { + NextInSequenceMask &= (~Mask); + if (!NextInSequenceMask) { + NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; + RemovedFromNextInSequence = 0; + } +} + +uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) { + // This method assumes that ReadyMask cannot be zero. + uint64_t CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask); + while (!(ReadyMask & CandidateMask)) { + skipMask(CandidateMask); + CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask); + } + return CandidateMask; +} + +void DefaultResourceStrategy::used(uint64_t Mask) { + if (Mask > NextInSequenceMask) { + RemovedFromNextInSequence |= Mask; + return; + } + skipMask(Mask); +} + +ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index, + uint64_t Mask) + : ProcResourceDescIndex(Index), ResourceMask(Mask), + BufferSize(Desc.BufferSize) { + if (llvm::countPopulation(ResourceMask) > 1) + ResourceSizeMask = ResourceMask ^ llvm::PowerOf2Floor(ResourceMask); + else + ResourceSizeMask = (1ULL << Desc.NumUnits) - 1; + ReadyMask = ResourceSizeMask; + AvailableSlots = BufferSize == -1 ? 0U : static_cast(BufferSize); + Unavailable = false; +} + +bool ResourceState::isReady(unsigned NumUnits) const { + return (!isReserved() || isADispatchHazard()) && + llvm::countPopulation(ReadyMask) >= NumUnits; +} + +ResourceStateEvent ResourceState::isBufferAvailable() const { + if (isADispatchHazard() && isReserved()) + return RS_RESERVED; + if (!isBuffered() || AvailableSlots) + return RS_BUFFER_AVAILABLE; + return RS_BUFFER_UNAVAILABLE; +} + +#ifndef NDEBUG +void ResourceState::dump() const { + dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask + << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize + << ", AvailableSlots=" << AvailableSlots + << ", Reserved=" << Unavailable << '\n'; +} +#endif + +static unsigned getResourceStateIndex(uint64_t Mask) { + return std::numeric_limits::digits - llvm::countLeadingZeros(Mask); +} + +static std::unique_ptr +getStrategyFor(const ResourceState &RS) { + if (RS.isAResourceGroup() || RS.getNumUnits() > 1) + return llvm::make_unique(RS.getReadyMask()); + return std::unique_ptr(nullptr); +} + +ResourceManager::ResourceManager(const MCSchedModel &SM) + : ProcResID2Mask(SM.getNumProcResourceKinds()) { + computeProcResourceMasks(SM, ProcResID2Mask); + Resources.resize(SM.getNumProcResourceKinds()); + Strategies.resize(SM.getNumProcResourceKinds()); + + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + uint64_t Mask = ProcResID2Mask[I]; + unsigned Index = getResourceStateIndex(Mask); + Resources[Index] = + llvm::make_unique(*SM.getProcResource(I), I, Mask); + Strategies[Index] = getStrategyFor(*Resources[Index]); + } +} + +void ResourceManager::setCustomStrategyImpl(std::unique_ptr S, + uint64_t ResourceMask) { + unsigned Index = getResourceStateIndex(ResourceMask); + assert(Index < Resources.size() && "Invalid processor resource index!"); + assert(S && "Unexpected null strategy in input!"); + Strategies[Index] = std::move(S); +} + +unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const { + return Resources[getResourceStateIndex(Mask)]->getProcResourceID(); +} + +unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const { + return Resources[getResourceStateIndex(ResourceID)]->getNumUnits(); +} + +// Returns the actual resource consumed by this Use. +// First, is the primary resource ID. +// Second, is the specific sub-resource ID. +ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) { + unsigned Index = getResourceStateIndex(ResourceID); + ResourceState &RS = *Resources[Index]; + assert(RS.isReady() && "No available units to select!"); + + // Special case where RS is not a group, and it only declares a single + // resource unit. + if (!RS.isAResourceGroup() && RS.getNumUnits() == 1) + return std::make_pair(ResourceID, RS.getReadyMask()); + + uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask()); + if (RS.isAResourceGroup()) + return selectPipe(SubResourceID); + return std::make_pair(ResourceID, SubResourceID); +} + +void ResourceManager::use(const ResourceRef &RR) { + // Mark the sub-resource referenced by RR as used. + ResourceState &RS = *Resources[getResourceStateIndex(RR.first)]; + RS.markSubResourceAsUsed(RR.second); + // If there are still available units in RR.first, + // then we are done. + if (RS.isReady()) + return; + + // Notify to other resources that RR.first is no longer available. + for (std::unique_ptr &Res : Resources) { + ResourceState &Current = *Res; + if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) + continue; + + if (Current.containsResource(RR.first)) { + unsigned Index = getResourceStateIndex(Current.getResourceMask()); + Current.markSubResourceAsUsed(RR.first); + Strategies[Index]->used(RR.first); + } + } +} + +void ResourceManager::release(const ResourceRef &RR) { + ResourceState &RS = *Resources[getResourceStateIndex(RR.first)]; + bool WasFullyUsed = !RS.isReady(); + RS.releaseSubResource(RR.second); + if (!WasFullyUsed) + return; + + for (std::unique_ptr &Res : Resources) { + ResourceState &Current = *Res; + if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) + continue; + + if (Current.containsResource(RR.first)) + Current.releaseSubResource(RR.first); + } +} + +ResourceStateEvent +ResourceManager::canBeDispatched(ArrayRef Buffers) const { + ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; + for (uint64_t Buffer : Buffers) { + ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; + Result = RS.isBufferAvailable(); + if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) + break; + } + return Result; +} + +void ResourceManager::reserveBuffers(ArrayRef Buffers) { + for (const uint64_t Buffer : Buffers) { + ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; + assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); + RS.reserveBuffer(); + + if (RS.isADispatchHazard()) { + assert(!RS.isReserved()); + RS.setReserved(); + } + } +} + +void ResourceManager::releaseBuffers(ArrayRef Buffers) { + for (const uint64_t R : Buffers) + Resources[getResourceStateIndex(R)]->releaseBuffer(); +} + +bool ResourceManager::canBeIssued(const InstrDesc &Desc) const { + return std::all_of(Desc.Resources.begin(), Desc.Resources.end(), + [&](const std::pair &E) { + unsigned NumUnits = + E.second.isReserved() ? 0U : E.second.NumUnits; + unsigned Index = getResourceStateIndex(E.first); + return Resources[Index]->isReady(NumUnits); + }); +} + +// Returns true if all resources are in-order, and there is at least one +// resource which is a dispatch hazard (BufferSize = 0). +bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const { + if (!canBeIssued(Desc)) + return false; + bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) { + unsigned Index = getResourceStateIndex(BufferMask); + const ResourceState &Resource = *Resources[Index]; + return Resource.isInOrder() || Resource.isADispatchHazard(); + }); + if (!AllInOrderResources) + return false; + + return any_of(Desc.Buffers, [&](uint64_t BufferMask) { + return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard(); + }); +} + +void ResourceManager::issueInstruction( + const InstrDesc &Desc, + SmallVectorImpl> &Pipes) { + for (const std::pair &R : Desc.Resources) { + const CycleSegment &CS = R.second.CS; + if (!CS.size()) { + releaseResource(R.first); + continue; + } + + assert(CS.begin() == 0 && "Invalid {Start, End} cycles!"); + if (!R.second.isReserved()) { + ResourceRef Pipe = selectPipe(R.first); + use(Pipe); + BusyResources[Pipe] += CS.size(); + // Replace the resource mask with a valid processor resource index. + const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)]; + Pipe.first = RS.getProcResourceID(); + Pipes.emplace_back( + std::pair(Pipe, static_cast(CS.size()))); + } else { + assert((countPopulation(R.first) > 1) && "Expected a group!"); + // Mark this group as reserved. + assert(R.second.isReserved()); + reserveResource(R.first); + BusyResources[ResourceRef(R.first, R.first)] += CS.size(); + } + } +} + +void ResourceManager::cycleEvent(SmallVectorImpl &ResourcesFreed) { + for (std::pair &BR : BusyResources) { + if (BR.second) + BR.second--; + if (!BR.second) { + // Release this resource. + const ResourceRef &RR = BR.first; + + if (countPopulation(RR.first) == 1) + release(RR); + + releaseResource(RR.first); + ResourcesFreed.push_back(RR); + } + } + + for (const ResourceRef &RF : ResourcesFreed) + BusyResources.erase(RF); +} + +void ResourceManager::reserveResource(uint64_t ResourceID) { + ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; + assert(!Resource.isReserved()); + Resource.setReserved(); +} + +void ResourceManager::releaseResource(uint64_t ResourceID) { + ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; + Resource.clearReserved(); +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp @@ -0,0 +1,87 @@ +//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file simulates the hardware responsible for retiring instructions. +/// +//===----------------------------------------------------------------------===// + +#include "HardwareUnits/RetireControlUnit.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM) + : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), + AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + // Check if the scheduling model provides extra information about the machine + // processor. If so, then use that information to set the reorder buffer size + // and the maximum number of instructions retired per cycle. + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (EPI.ReorderBufferSize) + AvailableSlots = EPI.ReorderBufferSize; + MaxRetirePerCycle = EPI.MaxRetirePerCycle; + } + + assert(AvailableSlots && "Invalid reorder buffer size!"); + Queue.resize(AvailableSlots); +} + +// Reserves a number of slots, and returns a new token. +unsigned RetireControlUnit::reserveSlot(const InstRef &IR, + unsigned NumMicroOps) { + assert(isAvailable(NumMicroOps)); + unsigned NormalizedQuantity = + std::min(NumMicroOps, static_cast(Queue.size())); + // Zero latency instructions may have zero mOps. Artificially bump this + // value to 1. Although zero latency instructions don't consume scheduler + // resources, they still consume one slot in the retire queue. + NormalizedQuantity = std::max(NormalizedQuantity, 1U); + unsigned TokenID = NextAvailableSlotIdx; + Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; + NextAvailableSlotIdx += NormalizedQuantity; + NextAvailableSlotIdx %= Queue.size(); + AvailableSlots -= NormalizedQuantity; + return TokenID; +} + +const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { + return Queue[CurrentInstructionSlotIdx]; +} + +void RetireControlUnit::consumeCurrentToken() { + const RetireControlUnit::RUToken &Current = peekCurrentToken(); + assert(Current.NumSlots && "Reserved zero slots?"); + assert(Current.IR.isValid() && "Invalid RUToken in the RCU queue."); + + // Update the slot index to be the next item in the circular queue. + CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx %= Queue.size(); + AvailableSlots += Current.NumSlots; +} + +void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { + assert(Queue.size() > TokenID); + assert(Queue[TokenID].Executed == false && Queue[TokenID].IR.isValid()); + Queue[TokenID].Executed = true; +} + +#ifndef NDEBUG +void RetireControlUnit::dump() const { + dbgs() << "Retire Unit: { Total Slots=" << Queue.size() + << ", Available Slots=" << AvailableSlots << " }\n"; +} +#endif + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp @@ -0,0 +1,244 @@ +//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A scheduler for processor resource units and processor resource groups. +// +//===----------------------------------------------------------------------===// + +#include "HardwareUnits/Scheduler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace mca { + +using namespace llvm; + +#define DEBUG_TYPE "llvm-mca" + +void Scheduler::initializeStrategy(std::unique_ptr S) { + // Ensure we have a valid (non-null) strategy object. + Strategy = S ? std::move(S) : llvm::make_unique(); +} + +// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. +SchedulerStrategy::~SchedulerStrategy() = default; +DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; + +#ifndef NDEBUG +void Scheduler::dump() const { + dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; + dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; + dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; + Resources->dump(); +} +#endif + +Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + + switch (Resources->canBeDispatched(Desc.Buffers)) { + case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: + return Scheduler::SC_BUFFERS_FULL; + case ResourceStateEvent::RS_RESERVED: + return Scheduler::SC_DISPATCH_GROUP_STALL; + case ResourceStateEvent::RS_BUFFER_AVAILABLE: + break; + } + + // Give lower priority to LSUnit stall events. + switch (LSU->isAvailable(IR)) { + case LSUnit::LSU_LQUEUE_FULL: + return Scheduler::SC_LOAD_QUEUE_FULL; + case LSUnit::LSU_SQUEUE_FULL: + return Scheduler::SC_STORE_QUEUE_FULL; + case LSUnit::LSU_AVAILABLE: + return Scheduler::SC_AVAILABLE; + } + + llvm_unreachable("Don't know how to process this LSU state result!"); +} + +void Scheduler::issueInstructionImpl( + InstRef &IR, + SmallVectorImpl> &UsedResources) { + Instruction *IS = IR.getInstruction(); + const InstrDesc &D = IS->getDesc(); + + // Issue the instruction and collect all the consumed resources + // into a vector. That vector is then used to notify the listener. + Resources->issueInstruction(D, UsedResources); + + // Notify the instruction that it started executing. + // This updates the internal state of each write. + IS->execute(); + + if (IS->isExecuting()) + IssuedSet.emplace_back(IR); + else if (IS->isExecuted()) + LSU->onInstructionExecuted(IR); +} + +// Release the buffered resources and issue the instruction. +void Scheduler::issueInstruction( + InstRef &IR, SmallVectorImpl> &UsedResources, + SmallVectorImpl &ReadyInstructions) { + const Instruction &Inst = *IR.getInstruction(); + bool HasDependentUsers = Inst.hasDependentUsers(); + + Resources->releaseBuffers(Inst.getDesc().Buffers); + issueInstructionImpl(IR, UsedResources); + // Instructions that have been issued during this cycle might have unblocked + // other dependent instructions. Dependent instructions may be issued during + // this same cycle if operands have ReadAdvance entries. Promote those + // instructions to the ReadySet and notify the caller that those are ready. + if (HasDependentUsers) + promoteToReadySet(ReadyInstructions); +} + +void Scheduler::promoteToReadySet(SmallVectorImpl &Ready) { + // Scan the set of waiting instructions and promote them to the + // ready queue if operands are all ready. + unsigned RemovedElements = 0; + for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR.isValid()) + break; + + // Check if this instruction is now ready. In case, force + // a transition in state using method 'update()'. + Instruction &IS = *IR.getInstruction(); + if (!IS.isReady()) + IS.update(); + + // Check if there are still unsolved data dependencies. + if (!isReady(IR)) { + ++I; + continue; + } + + Ready.emplace_back(IR); + ReadySet.emplace_back(IR); + + IR.invalidate(); + ++RemovedElements; + std::iter_swap(I, E - RemovedElements); + } + + WaitSet.resize(WaitSet.size() - RemovedElements); +} + +InstRef Scheduler::select() { + unsigned QueueIndex = ReadySet.size(); + for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { + const InstRef &IR = ReadySet[I]; + if (QueueIndex == ReadySet.size() || + Strategy->compare(IR, ReadySet[QueueIndex])) { + const InstrDesc &D = IR.getInstruction()->getDesc(); + if (Resources->canBeIssued(D)) + QueueIndex = I; + } + } + + if (QueueIndex == ReadySet.size()) + return InstRef(); + + // We found an instruction to issue. + InstRef IR = ReadySet[QueueIndex]; + std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); + ReadySet.pop_back(); + return IR; +} + +void Scheduler::updateIssuedSet(SmallVectorImpl &Executed) { + unsigned RemovedElements = 0; + for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR.isValid()) + break; + Instruction &IS = *IR.getInstruction(); + if (!IS.isExecuted()) { + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR + << " is still executing.\n"); + ++I; + continue; + } + + // Instruction IR has completed execution. + LSU->onInstructionExecuted(IR); + Executed.emplace_back(IR); + ++RemovedElements; + IR.invalidate(); + std::iter_swap(I, E - RemovedElements); + } + + IssuedSet.resize(IssuedSet.size() - RemovedElements); +} + +void Scheduler::cycleEvent(SmallVectorImpl &Freed, + SmallVectorImpl &Executed, + SmallVectorImpl &Ready) { + // Release consumed resources. + Resources->cycleEvent(Freed); + + // Propagate the cycle event to the 'Issued' and 'Wait' sets. + for (InstRef &IR : IssuedSet) + IR.getInstruction()->cycleEvent(); + + updateIssuedSet(Executed); + + for (InstRef &IR : WaitSet) + IR.getInstruction()->cycleEvent(); + + promoteToReadySet(Ready); +} + +bool Scheduler::mustIssueImmediately(const InstRef &IR) const { + // Instructions that use an in-order dispatch/issue processor resource must be + // issued immediately to the pipeline(s). Any other in-order buffered + // resources (i.e. BufferSize=1) is consumed. + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc); +} + +void Scheduler::dispatch(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + Resources->reserveBuffers(Desc.Buffers); + + // If necessary, reserve queue entries in the load-store unit (LSU). + bool IsMemOp = Desc.MayLoad || Desc.MayStore; + if (IsMemOp) + LSU->dispatch(IR); + + if (!isReady(IR)) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); + WaitSet.push_back(IR); + return; + } + + // Don't add a zero-latency instruction to the Ready queue. + // A zero-latency instruction doesn't consume any scheduler resources. That is + // because it doesn't need to be executed, and it is often removed at register + // renaming stage. For example, register-register moves are often optimized at + // register renaming stage by simply updating register aliases. On some + // targets, zero-idiom instructions (for example: a xor that clears the value + // of a register) are treated specially, and are often eliminated at register + // renaming stage. + if (!mustIssueImmediately(IR)) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); + ReadySet.push_back(IR); + } +} + +bool Scheduler::isReady(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + bool IsMemOp = Desc.MayLoad || Desc.MayStore; + return IR.getInstruction()->isReady() && (!IsMemOp || LSU->isReady(IR)); +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp +++ llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp @@ -0,0 +1,485 @@ +//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the InstrBuilder interface. +/// +//===----------------------------------------------------------------------===// + +#include "InstrBuilder.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +using namespace llvm; + +static void initializeUsedResources(InstrDesc &ID, + const MCSchedClassDesc &SCDesc, + const MCSubtargetInfo &STI, + ArrayRef ProcResourceMasks) { + const MCSchedModel &SM = STI.getSchedModel(); + + // Populate resources consumed. + using ResourcePlusCycles = std::pair; + std::vector Worklist; + + // Track cycles contributed by resources that are in a "Super" relationship. + // This is required if we want to correctly match the behavior of method + // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set + // of "consumed" processor resources and resource cycles, the logic in + // ExpandProcResource() doesn't update the number of resource cycles + // contributed by a "Super" resource to a group. + // We need to take this into account when we find that a processor resource is + // part of a group, and it is also used as the "Super" of other resources. + // This map stores the number of cycles contributed by sub-resources that are + // part of a "Super" resource. The key value is the "Super" resource mask ID. + DenseMap SuperResources; + + for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { + const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; + const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); + uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; + if (PR.BufferSize != -1) + ID.Buffers.push_back(Mask); + CycleSegment RCy(0, PRE->Cycles, false); + Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); + if (PR.SuperIdx) { + uint64_t Super = ProcResourceMasks[PR.SuperIdx]; + SuperResources[Super] += PRE->Cycles; + } + } + + // Sort elements by mask popcount, so that we prioritize resource units over + // resource groups, and smaller groups over larger groups. + llvm::sort(Worklist.begin(), Worklist.end(), + [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { + unsigned popcntA = countPopulation(A.first); + unsigned popcntB = countPopulation(B.first); + if (popcntA < popcntB) + return true; + if (popcntA > popcntB) + return false; + return A.first < B.first; + }); + + uint64_t UsedResourceUnits = 0; + + // Remove cycles contributed by smaller resources. + for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { + ResourcePlusCycles &A = Worklist[I]; + if (!A.second.size()) { + A.second.NumUnits = 0; + A.second.setReserved(); + ID.Resources.emplace_back(A); + continue; + } + + ID.Resources.emplace_back(A); + uint64_t NormalizedMask = A.first; + if (countPopulation(A.first) == 1) { + UsedResourceUnits |= A.first; + } else { + // Remove the leading 1 from the resource group mask. + NormalizedMask ^= PowerOf2Floor(NormalizedMask); + } + + for (unsigned J = I + 1; J < E; ++J) { + ResourcePlusCycles &B = Worklist[J]; + if ((NormalizedMask & B.first) == NormalizedMask) { + B.second.CS.Subtract(A.second.size() - SuperResources[A.first]); + if (countPopulation(B.first) > 1) + B.second.NumUnits++; + } + } + } + + // A SchedWrite may specify a number of cycles in which a resource group + // is reserved. For example (on target x86; cpu Haswell): + // + // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { + // let ResourceCycles = [2, 2, 3]; + // } + // + // This means: + // Resource units HWPort0 and HWPort1 are both used for 2cy. + // Resource group HWPort01 is the union of HWPort0 and HWPort1. + // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 + // will not be usable for 2 entire cycles from instruction issue. + // + // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency + // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an + // extra delay on top of the 2 cycles latency. + // During those extra cycles, HWPort01 is not usable by other instructions. + for (ResourcePlusCycles &RPC : ID.Resources) { + if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { + // Remove the leading 1 from the resource group mask. + uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); + if ((Mask & UsedResourceUnits) == Mask) + RPC.second.setReserved(); + } + } + + LLVM_DEBUG({ + for (const std::pair &R : ID.Resources) + dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n'; + for (const uint64_t R : ID.Buffers) + dbgs() << "\t\tBuffer Mask=" << R << '\n'; + }); +} + +static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, + const MCSchedClassDesc &SCDesc, + const MCSubtargetInfo &STI) { + if (MCDesc.isCall()) { + // We cannot estimate how long this call will take. + // Artificially set an arbitrarily high latency (100cy). + ID.MaxLatency = 100U; + return; + } + + int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); + // If latency is unknown, then conservatively assume a MaxLatency of 100cy. + ID.MaxLatency = Latency < 0 ? 100U : static_cast(Latency); +} + +Error InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, + unsigned SchedClassID) { + const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); + const MCSchedModel &SM = STI.getSchedModel(); + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + + // These are for now the (strong) assumptions made by this algorithm: + // * The number of explicit and implicit register definitions in a MCInst + // matches the number of explicit and implicit definitions according to + // the opcode descriptor (MCInstrDesc). + // * Register definitions take precedence over register uses in the operands + // list. + // * If an opcode specifies an optional definition, then the optional + // definition is always the last operand in the sequence, and it can be + // set to zero (i.e. "no register"). + // + // These assumptions work quite well for most out-of-order in-tree targets + // like x86. This is mainly because the vast majority of instructions is + // expanded to MCInst using a straightforward lowering logic that preserves + // the ordering of the operands. + unsigned NumExplicitDefs = MCDesc.getNumDefs(); + unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); + unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; + unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; + if (MCDesc.hasOptionalDef()) + TotalDefs++; + ID.Writes.resize(TotalDefs); + // Iterate over the operands list, and skip non-register operands. + // The first NumExplictDefs register operands are expected to be register + // definitions. + unsigned CurrentDef = 0; + unsigned i = 0; + for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { + const MCOperand &Op = MCI.getOperand(i); + if (!Op.isReg()) + continue; + + WriteDescriptor &Write = ID.Writes[CurrentDef]; + Write.OpIndex = i; + if (CurrentDef < NumWriteLatencyEntries) { + const MCWriteLatencyEntry &WLE = + *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); + // Conservatively default to MaxLatency. + Write.Latency = + WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); + Write.SClassOrWriteResourceID = WLE.WriteResourceID; + } else { + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + } + Write.IsOptionalDef = false; + LLVM_DEBUG({ + dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + CurrentDef++; + } + + if (CurrentDef != NumExplicitDefs) { + return make_error( + "error: Expected more register operand definitions.", + inconvertibleErrorCode()); + } + + CurrentDef = 0; + for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { + unsigned Index = NumExplicitDefs + CurrentDef; + WriteDescriptor &Write = ID.Writes[Index]; + Write.OpIndex = ~CurrentDef; + Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; + if (Index < NumWriteLatencyEntries) { + const MCWriteLatencyEntry &WLE = + *STI.getWriteLatencyEntry(&SCDesc, Index); + // Conservatively default to MaxLatency. + Write.Latency = + WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); + Write.SClassOrWriteResourceID = WLE.WriteResourceID; + } else { + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + } + + Write.IsOptionalDef = false; + assert(Write.RegisterID != 0 && "Expected a valid phys register!"); + LLVM_DEBUG({ + dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex + << ", PhysReg=" << MRI.getName(Write.RegisterID) + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + } + + if (MCDesc.hasOptionalDef()) { + // Always assume that the optional definition is the last operand of the + // MCInst sequence. + const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1); + if (i == MCI.getNumOperands() || !Op.isReg()) + return make_error( + "error: expected a register operand for an optional " + "definition. Instruction has not be correctly analyzed.", + inconvertibleErrorCode()); + + WriteDescriptor &Write = ID.Writes[TotalDefs - 1]; + Write.OpIndex = MCI.getNumOperands() - 1; + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + Write.IsOptionalDef = true; + } + + return ErrorSuccess(); +} + +Error InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, + unsigned SchedClassID) { + const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); + unsigned NumExplicitDefs = MCDesc.getNumDefs(); + + // Skip explicit definitions. + unsigned i = 0; + for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) { + const MCOperand &Op = MCI.getOperand(i); + if (Op.isReg()) + NumExplicitDefs--; + } + + if (NumExplicitDefs) { + return make_error( + "error: Expected more register operand definitions. ", + inconvertibleErrorCode()); + } + + unsigned NumExplicitUses = MCI.getNumOperands() - i; + unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); + if (MCDesc.hasOptionalDef()) { + assert(NumExplicitUses); + NumExplicitUses--; + } + unsigned TotalUses = NumExplicitUses + NumImplicitUses; + if (!TotalUses) + return ErrorSuccess(); + + ID.Reads.resize(TotalUses); + for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) { + ReadDescriptor &Read = ID.Reads[CurrentUse]; + Read.OpIndex = i + CurrentUse; + Read.UseIndex = CurrentUse; + Read.SchedClassID = SchedClassID; + LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex + << ", UseIndex=" << Read.UseIndex << '\n'); + } + + for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) { + ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse]; + Read.OpIndex = ~CurrentUse; + Read.UseIndex = NumExplicitUses + CurrentUse; + Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse]; + Read.SchedClassID = SchedClassID; + LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID=" + << MRI.getName(Read.RegisterID) << '\n'); + } + return ErrorSuccess(); +} + +Expected +InstrBuilder::createInstrDescImpl(const MCInst &MCI) { + assert(STI.getSchedModel().hasInstrSchedModel() && + "Itineraries are not yet supported!"); + + // Obtain the instruction descriptor from the opcode. + unsigned short Opcode = MCI.getOpcode(); + const MCInstrDesc &MCDesc = MCII.get(Opcode); + const MCSchedModel &SM = STI.getSchedModel(); + + // Then obtain the scheduling class information from the instruction. + unsigned SchedClassID = MCDesc.getSchedClass(); + unsigned CPUID = SM.getProcessorID(); + + // Try to solve variant scheduling classes. + if (SchedClassID) { + while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) + SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); + + if (!SchedClassID) { + return make_error("unable to resolve this variant class.", + inconvertibleErrorCode()); + } + } + + // Check if this instruction is supported. Otherwise, report an error. + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { + std::string ToString; + llvm::raw_string_ostream OS(ToString); + WithColor::error() << "found an unsupported instruction in the input" + << " assembly sequence.\n"; + MCIP.printInst(&MCI, OS, "", STI); + OS.flush(); + WithColor::note() << "instruction: " << ToString << '\n'; + return make_error( + "Don't know how to analyze unsupported instructions", + inconvertibleErrorCode()); + } + + // Create a new empty descriptor. + std::unique_ptr ID = llvm::make_unique(); + ID->NumMicroOps = SCDesc.NumMicroOps; + + if (MCDesc.isCall()) { + // We don't correctly model calls. + WithColor::warning() << "found a call in the input assembly sequence.\n"; + WithColor::note() << "call instructions are not correctly modeled. " + << "Assume a latency of 100cy.\n"; + } + + if (MCDesc.isReturn()) { + WithColor::warning() << "found a return instruction in the input" + << " assembly sequence.\n"; + WithColor::note() << "program counter updates are ignored.\n"; + } + + ID->MayLoad = MCDesc.mayLoad(); + ID->MayStore = MCDesc.mayStore(); + ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); + + initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); + computeMaxLatency(*ID, MCDesc, SCDesc, STI); + if (auto Err = populateWrites(*ID, MCI, SchedClassID)) + return std::move(Err); + if (auto Err = populateReads(*ID, MCI, SchedClassID)) + return std::move(Err); + + LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); + LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); + + // Now add the new descriptor. + SchedClassID = MCDesc.getSchedClass(); + if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) { + Descriptors[MCI.getOpcode()] = std::move(ID); + return *Descriptors[MCI.getOpcode()]; + } + + VariantDescriptors[&MCI] = std::move(ID); + return *VariantDescriptors[&MCI]; +} + +Expected +InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { + if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) + return *Descriptors[MCI.getOpcode()]; + + if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) + return *VariantDescriptors[&MCI]; + + return createInstrDescImpl(MCI); +} + +Expected> +InstrBuilder::createInstruction(const MCInst &MCI) { + Expected DescOrErr = getOrCreateInstrDesc(MCI); + if (!DescOrErr) + return DescOrErr.takeError(); + const InstrDesc &D = *DescOrErr; + std::unique_ptr NewIS = llvm::make_unique(D); + + // Initialize Reads first. + for (const ReadDescriptor &RD : D.Reads) { + int RegID = -1; + if (!RD.isImplicitRead()) { + // explicit read. + const MCOperand &Op = MCI.getOperand(RD.OpIndex); + // Skip non-register operands. + if (!Op.isReg()) + continue; + RegID = Op.getReg(); + } else { + // Implicit read. + RegID = RD.RegisterID; + } + + // Skip invalid register operands. + if (!RegID) + continue; + + // Okay, this is a register operand. Create a ReadState for it. + assert(RegID > 0 && "Invalid register ID found!"); + NewIS->getUses().emplace_back(llvm::make_unique(RD, RegID)); + } + + // Early exit if there are no writes. + if (D.Writes.empty()) + return std::move(NewIS); + + // Track register writes that implicitly clear the upper portion of the + // underlying super-registers using an APInt. + APInt WriteMask(D.Writes.size(), 0); + + // Now query the MCInstrAnalysis object to obtain information about which + // register writes implicitly clear the upper portion of a super-register. + MCIA.clearsSuperRegisters(MRI, MCI, WriteMask); + + // Check if this is a dependency breaking instruction. + if (MCIA.isDependencyBreaking(STI, MCI)) + NewIS->setDependencyBreaking(); + + // Initialize writes. + unsigned WriteIndex = 0; + for (const WriteDescriptor &WD : D.Writes) { + unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID + : MCI.getOperand(WD.OpIndex).getReg(); + // Check if this is a optional definition that references NoReg. + if (WD.IsOptionalDef && !RegID) { + ++WriteIndex; + continue; + } + + assert(RegID && "Expected a valid register ID!"); + NewIS->getDefs().emplace_back(llvm::make_unique( + WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex])); + ++WriteIndex; + } + + return std::move(NewIS); +} +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/Instruction.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Instruction.cpp +++ llvm/trunk/tools/llvm-mca/lib/Instruction.cpp @@ -0,0 +1,177 @@ +//===--------------------- Instruction.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines abstractions used by the Pipeline to model register reads, +// register writes and instructions. +// +//===----------------------------------------------------------------------===// + +#include "Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace mca { + +using namespace llvm; + +void ReadState::writeStartEvent(unsigned Cycles) { + assert(DependentWrites); + assert(CyclesLeft == UNKNOWN_CYCLES); + + // This read may be dependent on more than one write. This typically occurs + // when a definition is the result of multiple writes where at least one + // write does a partial register update. + // The HW is forced to do some extra bookkeeping to track of all the + // dependent writes, and implement a merging scheme for the partial writes. + --DependentWrites; + TotalCycles = std::max(TotalCycles, Cycles); + + if (!DependentWrites) { + CyclesLeft = TotalCycles; + IsReady = !CyclesLeft; + } +} + +void WriteState::onInstructionIssued() { + assert(CyclesLeft == UNKNOWN_CYCLES); + // Update the number of cycles left based on the WriteDescriptor info. + CyclesLeft = getLatency(); + + // Now that the time left before write-back is known, notify + // all the users. + for (const std::pair &User : Users) { + ReadState *RS = User.first; + unsigned ReadCycles = std::max(0, CyclesLeft - User.second); + RS->writeStartEvent(ReadCycles); + } +} + +void WriteState::addUser(ReadState *User, int ReadAdvance) { + // If CyclesLeft is different than -1, then we don't need to + // update the list of users. We can just notify the user with + // the actual number of cycles left (which may be zero). + if (CyclesLeft != UNKNOWN_CYCLES) { + unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance); + User->writeStartEvent(ReadCycles); + return; + } + + std::pair NewPair(User, ReadAdvance); + Users.insert(NewPair); +} + +void WriteState::cycleEvent() { + // Note: CyclesLeft can be a negative number. It is an error to + // make it an unsigned quantity because users of this write may + // specify a negative ReadAdvance. + if (CyclesLeft != UNKNOWN_CYCLES) + CyclesLeft--; +} + +void ReadState::cycleEvent() { + // Update the total number of cycles. + if (DependentWrites && TotalCycles) { + --TotalCycles; + return; + } + + // Bail out immediately if we don't know how many cycles are left. + if (CyclesLeft == UNKNOWN_CYCLES) + return; + + if (CyclesLeft) { + --CyclesLeft; + IsReady = !CyclesLeft; + } +} + +#ifndef NDEBUG +void WriteState::dump() const { + dbgs() << "{ OpIdx=" << WD.OpIndex << ", Lat=" << getLatency() << ", RegID " + << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }"; +} + +void WriteRef::dump() const { + dbgs() << "IID=" << getSourceIndex() << ' '; + if (isValid()) + getWriteState()->dump(); + else + dbgs() << "(null)"; +} +#endif + +void Instruction::dispatch(unsigned RCUToken) { + assert(Stage == IS_INVALID); + Stage = IS_AVAILABLE; + RCUTokenID = RCUToken; + + // Check if input operands are already available. + update(); +} + +void Instruction::execute() { + assert(Stage == IS_READY); + Stage = IS_EXECUTING; + + // Set the cycles left before the write-back stage. + CyclesLeft = Desc.MaxLatency; + + for (UniqueDef &Def : Defs) + Def->onInstructionIssued(); + + // Transition to the "executed" stage if this is a zero-latency instruction. + if (!CyclesLeft) + Stage = IS_EXECUTED; +} + +void Instruction::update() { + assert(isDispatched() && "Unexpected instruction stage found!"); + + if (!llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); })) + return; + + // A partial register write cannot complete before a dependent write. + auto IsDefReady = [&](const UniqueDef &Def) { + if (const WriteState *Write = Def->getDependentWrite()) { + int WriteLatency = Write->getCyclesLeft(); + if (WriteLatency == UNKNOWN_CYCLES) + return false; + return static_cast(WriteLatency) < Desc.MaxLatency; + } + return true; + }; + + if (llvm::all_of(Defs, IsDefReady)) + Stage = IS_READY; +} + +void Instruction::cycleEvent() { + if (isReady()) + return; + + if (isDispatched()) { + for (UniqueUse &Use : Uses) + Use->cycleEvent(); + + update(); + return; + } + + assert(isExecuting() && "Instruction not in-flight?"); + assert(CyclesLeft && "Instruction already executed?"); + for (UniqueDef &Def : Defs) + Def->cycleEvent(); + CyclesLeft--; + if (!CyclesLeft) + Stage = IS_EXECUTED; +} + +const unsigned WriteRef::INVALID_IID = std::numeric_limits::max(); + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt =================================================================== --- llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt +++ llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llvm-mca/lib/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MCA +parent = Libraries +required_libraries = CodeGen MC Support Index: llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp +++ llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp @@ -0,0 +1,97 @@ +//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements an ordered container of stages that simulate the +/// pipeline of a hardware backend. +/// +//===----------------------------------------------------------------------===// + +#include "Pipeline.h" +#include "HWEventListener.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/Debug.h" + +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +using namespace llvm; + +void Pipeline::addEventListener(HWEventListener *Listener) { + if (Listener) + Listeners.insert(Listener); + for (auto &S : Stages) + S->addListener(Listener); +} + +bool Pipeline::hasWorkToProcess() { + return llvm::any_of(Stages, [](const std::unique_ptr &S) { + return S->hasWorkToComplete(); + }); +} + +llvm::Error Pipeline::run() { + assert(!Stages.empty() && "Unexpected empty pipeline found!"); + + while (hasWorkToProcess()) { + notifyCycleBegin(); + if (llvm::Error Err = runCycle()) + return Err; + notifyCycleEnd(); + ++Cycles; + } + return llvm::ErrorSuccess(); +} + +llvm::Error Pipeline::runCycle() { + llvm::Error Err = llvm::ErrorSuccess(); + // Update stages before we start processing new instructions. + for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { + const std::unique_ptr &S = *I; + Err = S->cycleStart(); + } + + // Now fetch and execute new instructions. + InstRef IR; + Stage &FirstStage = *Stages[0]; + while (!Err && FirstStage.isAvailable(IR)) + Err = FirstStage.execute(IR); + + // Update stages in preparation for a new cycle. + for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { + const std::unique_ptr &S = *I; + Err = S->cycleEnd(); + } + + return Err; +} + +void Pipeline::appendStage(std::unique_ptr S) { + assert(S && "Invalid null stage in input!"); + if (!Stages.empty()) { + Stage *Last = Stages.back().get(); + Last->setNextInSequence(S.get()); + } + + Stages.push_back(std::move(S)); +} + +void Pipeline::notifyCycleBegin() { + LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n'); + for (HWEventListener *Listener : Listeners) + Listener->onCycleBegin(); +} + +void Pipeline::notifyCycleEnd() { + LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n"); + for (HWEventListener *Listener : Listeners) + Listener->onCycleEnd(); +} +} // namespace mca. Index: llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp @@ -0,0 +1,160 @@ +//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file models the dispatch component of an instruction pipeline. +/// +/// The DispatchStage is responsible for updating instruction dependencies +/// and communicating to the simulated instruction scheduler that an instruction +/// is ready to be scheduled for execution. +/// +//===----------------------------------------------------------------------===// + +#include "Stages/DispatchStage.h" +#include "HWEventListener.h" +#include "HardwareUnits/Scheduler.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +void DispatchStage::notifyInstructionDispatched(const InstRef &IR, + ArrayRef UsedRegs) { + LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); + notifyEvent(HWInstructionDispatchedEvent(IR, UsedRegs)); +} + +bool DispatchStage::checkPRF(const InstRef &IR) const { + SmallVector RegDefs; + for (const std::unique_ptr &RegDef : + IR.getInstruction()->getDefs()) + RegDefs.emplace_back(RegDef->getRegisterID()); + + const unsigned RegisterMask = PRF.isAvailable(RegDefs); + // A mask with all zeroes means: register files are available. + if (RegisterMask) { + notifyEvent( + HWStallEvent(HWStallEvent::RegisterFileStall, IR)); + return false; + } + + return true; +} + +bool DispatchStage::checkRCU(const InstRef &IR) const { + const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; + if (RCU.isAvailable(NumMicroOps)) + return true; + notifyEvent( + HWStallEvent(HWStallEvent::RetireControlUnitStall, IR)); + return false; +} + +bool DispatchStage::canDispatch(const InstRef &IR) const { + return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR); +} + +void DispatchStage::updateRAWDependencies(ReadState &RS, + const MCSubtargetInfo &STI) { + SmallVector DependentWrites; + + collectWrites(DependentWrites, RS.getRegisterID()); + RS.setDependentWrites(DependentWrites.size()); + // We know that this read depends on all the writes in DependentWrites. + // For each write, check if we have ReadAdvance information, and use it + // to figure out in how many cycles this read becomes available. + const ReadDescriptor &RD = RS.getDescriptor(); + const MCSchedModel &SM = STI.getSchedModel(); + const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); + for (WriteRef &WR : DependentWrites) { + WriteState &WS = *WR.getWriteState(); + unsigned WriteResID = WS.getWriteResourceID(); + int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); + WS.addUser(&RS, ReadAdvance); + } +} + +llvm::Error DispatchStage::dispatch(InstRef IR) { + assert(!CarryOver && "Cannot dispatch another instruction!"); + Instruction &IS = *IR.getInstruction(); + const InstrDesc &Desc = IS.getDesc(); + const unsigned NumMicroOps = Desc.NumMicroOps; + if (NumMicroOps > DispatchWidth) { + assert(AvailableEntries == DispatchWidth); + AvailableEntries = 0; + CarryOver = NumMicroOps - DispatchWidth; + } else { + assert(AvailableEntries >= NumMicroOps); + AvailableEntries -= NumMicroOps; + } + + // A dependency-breaking instruction doesn't have to wait on the register + // input operands, and it is often optimized at register renaming stage. + // Update RAW dependencies if this instruction is not a dependency-breaking + // instruction. A dependency-breaking instruction is a zero-latency + // instruction that doesn't consume hardware resources. + // An example of dependency-breaking instruction on X86 is a zero-idiom XOR. + bool IsDependencyBreaking = IS.isDependencyBreaking(); + for (std::unique_ptr &RS : IS.getUses()) + if (RS->isImplicitRead() || !IsDependencyBreaking) + updateRAWDependencies(*RS, STI); + + // By default, a dependency-breaking zero-latency instruction is expected to + // be optimized at register renaming stage. That means, no physical register + // is allocated to the instruction. + bool ShouldAllocateRegisters = + !(Desc.isZeroLatency() && IsDependencyBreaking); + SmallVector RegisterFiles(PRF.getNumRegisterFiles()); + for (std::unique_ptr &WS : IS.getDefs()) { + PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles, + ShouldAllocateRegisters); + } + + // Reserve slots in the RCU, and notify the instruction that it has been + // dispatched to the schedulers for execution. + IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); + + // Notify listeners of the "instruction dispatched" event, + // and move IR to the next stage. + notifyInstructionDispatched(IR, RegisterFiles); + return moveToTheNextStage(IR); +} + +llvm::Error DispatchStage::cycleStart() { + AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; + CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U; + return llvm::ErrorSuccess(); +} + +bool DispatchStage::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); + if (Required > AvailableEntries) + return false; + // The dispatch logic doesn't internally buffer instructions. It only accepts + // instructions that can be successfully moved to the next stage during this + // same cycle. + return canDispatch(IR); +} + +llvm::Error DispatchStage::execute(InstRef &IR) { + assert(canDispatch(IR) && "Cannot dispatch another instruction!"); + return dispatch(IR); +} + +#ifndef NDEBUG +void DispatchStage::dump() const { + PRF.dump(); + RCU.dump(); +} +#endif +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp @@ -0,0 +1,195 @@ +//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the execution stage of an instruction pipeline. +/// +/// The ExecuteStage is responsible for managing the hardware scheduler +/// and issuing notifications that an instruction has been executed. +/// +//===----------------------------------------------------------------------===// + +#include "Stages/ExecuteStage.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +using namespace llvm; + +HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) { + switch (Status) { + case Scheduler::SC_LOAD_QUEUE_FULL: + return HWStallEvent::LoadQueueFull; + case Scheduler::SC_STORE_QUEUE_FULL: + return HWStallEvent::StoreQueueFull; + case Scheduler::SC_BUFFERS_FULL: + return HWStallEvent::SchedulerQueueFull; + case Scheduler::SC_DISPATCH_GROUP_STALL: + return HWStallEvent::DispatchGroupStall; + case Scheduler::SC_AVAILABLE: + return HWStallEvent::Invalid; + } + + llvm_unreachable("Don't know how to process this StallKind!"); +} + +bool ExecuteStage::isAvailable(const InstRef &IR) const { + if (Scheduler::Status S = HWS.isAvailable(IR)) { + HWStallEvent::GenericEventType ET = toHWStallEventType(S); + notifyEvent(HWStallEvent(ET, IR)); + return false; + } + + return true; +} + +Error ExecuteStage::issueInstruction(InstRef &IR) { + SmallVector, 4> Used; + SmallVector Ready; + HWS.issueInstruction(IR, Used, Ready); + + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + notifyReleasedBuffers(Desc.Buffers); + notifyInstructionIssued(IR, Used); + if (IR.getInstruction()->isExecuted()) { + notifyInstructionExecuted(IR); + //FIXME: add a buffer of executed instructions. + if (Error S = moveToTheNextStage(IR)) + return S; + } + + for (const InstRef &I : Ready) + notifyInstructionReady(I); + return ErrorSuccess(); +} + +Error ExecuteStage::issueReadyInstructions() { + InstRef IR = HWS.select(); + while (IR.isValid()) { + if (Error Err = issueInstruction(IR)) + return Err; + + // Select the next instruction to issue. + IR = HWS.select(); + } + + return ErrorSuccess(); +} + +Error ExecuteStage::cycleStart() { + llvm::SmallVector Freed; + llvm::SmallVector Executed; + llvm::SmallVector Ready; + + HWS.cycleEvent(Freed, Executed, Ready); + + for (const ResourceRef &RR : Freed) + notifyResourceAvailable(RR); + + for (InstRef &IR : Executed) { + notifyInstructionExecuted(IR); + //FIXME: add a buffer of executed instructions. + if (Error S = moveToTheNextStage(IR)) + return S; + } + + for (const InstRef &IR : Ready) + notifyInstructionReady(IR); + + return issueReadyInstructions(); +} + +// Schedule the instruction for execution on the hardware. +Error ExecuteStage::execute(InstRef &IR) { + assert(isAvailable(IR) && "Scheduler is not available!"); + +#ifndef NDEBUG + // Ensure that the HWS has not stored this instruction in its queues. + HWS.sanityCheck(IR); +#endif + // Reserve a slot in each buffered resource. Also, mark units with + // BufferSize=0 as reserved. Resources with a buffer size of zero will only + // be released after MCIS is issued, and all the ResourceCycles for those + // units have been consumed. + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + HWS.dispatch(IR); + notifyReservedBuffers(Desc.Buffers); + if (!HWS.isReady(IR)) + return ErrorSuccess(); + + // If we did not return early, then the scheduler is ready for execution. + notifyInstructionReady(IR); + + // If we cannot issue immediately, the HWS will add IR to its ready queue for + // execution later, so we must return early here. + if (!HWS.mustIssueImmediately(IR)) + return ErrorSuccess(); + + // Issue IR to the underlying pipelines. + return issueInstruction(IR); +} + +void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) { + LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n'); + notifyEvent( + HWInstructionEvent(HWInstructionEvent::Executed, IR)); +} + +void ExecuteStage::notifyInstructionReady(const InstRef &IR) { + LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n'); + notifyEvent( + HWInstructionEvent(HWInstructionEvent::Ready, IR)); +} + +void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) { + LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' + << RR.second << "]\n"); + for (HWEventListener *Listener : getListeners()) + Listener->onResourceAvailable(RR); +} + +void ExecuteStage::notifyInstructionIssued( + const InstRef &IR, ArrayRef> Used) { + LLVM_DEBUG({ + dbgs() << "[E] Instruction Issued: #" << IR << '\n'; + for (const std::pair &Resource : Used) { + dbgs() << "[E] Resource Used: [" << Resource.first.first << '.' + << Resource.first.second << "], "; + dbgs() << "cycles: " << Resource.second << '\n'; + } + }); + notifyEvent(HWInstructionIssuedEvent(IR, Used)); +} + +void ExecuteStage::notifyReservedBuffers(ArrayRef Buffers) { + if (Buffers.empty()) + return; + + SmallVector BufferIDs(Buffers.begin(), Buffers.end()); + std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(), + [&](uint64_t Op) { return HWS.getResourceID(Op); }); + for (HWEventListener *Listener : getListeners()) + Listener->onReservedBuffers(BufferIDs); +} + +void ExecuteStage::notifyReleasedBuffers(ArrayRef Buffers) { + if (Buffers.empty()) + return; + + SmallVector BufferIDs(Buffers.begin(), Buffers.end()); + std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(), + [&](uint64_t Op) { return HWS.getResourceID(Op); }); + for (HWEventListener *Listener : getListeners()) + Listener->onReleasedBuffers(BufferIDs); +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/Stages/FetchStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/FetchStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/FetchStage.cpp @@ -0,0 +1,82 @@ +//===---------------------- FetchStage.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the Fetch stage of an instruction pipeline. Its sole +/// purpose in life is to produce instructions for the rest of the pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "Stages/FetchStage.h" + +namespace mca { + +bool FetchStage::hasWorkToComplete() const { + return CurrentInstruction.get() || SM.hasNext(); +} + +bool FetchStage::isAvailable(const InstRef & /* unused */) const { + if (!CurrentInstruction) + return false; + assert(SM.hasNext() && "Unexpected internal state!"); + const SourceRef SR = SM.peekNext(); + InstRef IR(SR.first, CurrentInstruction.get()); + return checkNextStage(IR); +} + +llvm::Error FetchStage::getNextInstruction() { + assert(!CurrentInstruction && "There is already an instruction to process!"); + if (!SM.hasNext()) + return llvm::ErrorSuccess(); + const SourceRef SR = SM.peekNext(); + llvm::Expected> InstOrErr = + IB.createInstruction(*SR.second); + if (!InstOrErr) + return InstOrErr.takeError(); + CurrentInstruction = std::move(InstOrErr.get()); + return llvm::ErrorSuccess(); +} + +llvm::Error FetchStage::execute(InstRef & /*unused */) { + assert(CurrentInstruction && "There is no instruction to process!"); + const SourceRef SR = SM.peekNext(); + InstRef IR(SR.first, CurrentInstruction.get()); + assert(checkNextStage(IR) && "Invalid fetch!"); + + Instructions[IR.getSourceIndex()] = std::move(CurrentInstruction); + if (llvm::Error Val = moveToTheNextStage(IR)) + return Val; + + SM.updateNext(); + + // Move the program counter. + return getNextInstruction(); +} + +llvm::Error FetchStage::cycleStart() { + if (!CurrentInstruction && SM.hasNext()) + return getNextInstruction(); + return llvm::ErrorSuccess(); +} + +llvm::Error FetchStage::cycleEnd() { + // Find the first instruction which hasn't been retired. + const InstMap::iterator It = + llvm::find_if(Instructions, [](const InstMap::value_type &KeyValuePair) { + return !KeyValuePair.second->isRetired(); + }); + + // Erase instructions up to the first that hasn't been retired. + if (It != Instructions.begin()) + Instructions.erase(Instructions.begin(), It); + + return llvm::ErrorSuccess(); +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp @@ -0,0 +1,70 @@ +//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the method InstructionTables::execute(). +/// Method execute() prints a theoretical resource pressure distribution based +/// on the information available in the scheduling model, and without running +/// the pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "Stages/InstructionTables.h" + +namespace mca { + +using namespace llvm; + +Error InstructionTables::execute(InstRef &IR) { + ArrayRef Masks = IB.getProcResourceMasks(); + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + UsedResources.clear(); + + // Identify the resources consumed by this instruction. + for (const std::pair Resource : Desc.Resources) { + // Skip zero-cycle resources (i.e., unused resources). + if (!Resource.second.size()) + continue; + double Cycles = static_cast(Resource.second.size()); + unsigned Index = std::distance( + Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first)); + const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index); + unsigned NumUnits = ProcResource.NumUnits; + if (!ProcResource.SubUnitsIdxBegin) { + // The number of cycles consumed by each unit. + Cycles /= NumUnits; + for (unsigned I = 0, E = NumUnits; I < E; ++I) { + ResourceRef ResourceUnit = std::make_pair(Index, 1U << I); + UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles)); + } + continue; + } + + // This is a group. Obtain the set of resources contained in this + // group. Some of these resources may implement multiple units. + // Uniformly distribute Cycles across all of the units. + for (unsigned I1 = 0; I1 < NumUnits; ++I1) { + unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1]; + const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx); + // Compute the number of cycles consumed by each resource unit. + double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits); + for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) { + ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2); + UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles)); + } + } + } + + // Send a fake instruction issued event to all the views. + HWInstructionIssuedEvent Event(IR, UsedResources); + notifyEvent(Event); + return ErrorSuccess(); +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp @@ -0,0 +1,62 @@ +//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the retire stage of an instruction pipeline. +/// The RetireStage represents the process logic that interacts with the +/// simulated RetireControlUnit hardware. +/// +//===----------------------------------------------------------------------===// + +#include "Stages/RetireStage.h" +#include "HWEventListener.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +llvm::Error RetireStage::cycleStart() { + if (RCU.isEmpty()) + return llvm::ErrorSuccess(); + + const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle(); + unsigned NumRetired = 0; + while (!RCU.isEmpty()) { + if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) + break; + const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); + if (!Current.Executed) + break; + RCU.consumeCurrentToken(); + notifyInstructionRetired(Current.IR); + NumRetired++; + } + + return llvm::ErrorSuccess(); +} + +llvm::Error RetireStage::execute(InstRef &IR) { + RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID()); + return llvm::ErrorSuccess(); +} + +void RetireStage::notifyInstructionRetired(const InstRef &IR) { + LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n'); + llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); + const Instruction &Inst = *IR.getInstruction(); + const InstrDesc &Desc = Inst.getDesc(); + + bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking()); + for (const std::unique_ptr &WS : Inst.getDefs()) + PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs); + notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp @@ -0,0 +1,27 @@ +//===---------------------- Stage.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a stage. +/// A chain of stages compose an instruction pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "Stages/Stage.h" + +namespace mca { + +// Pin the vtable here in the implementation file. +Stage::~Stage() = default; + +void Stage::addListener(HWEventListener *Listener) { + Listeners.insert(Listener); +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/lib/Support.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Support.cpp +++ llvm/trunk/tools/llvm-mca/lib/Support.cpp @@ -0,0 +1,79 @@ +//===--------------------- Support.cpp --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a few helper functions used by various pipeline +/// components. +/// +//===----------------------------------------------------------------------===// + +#include "Support.h" +#include "llvm/MC/MCSchedule.h" + +namespace mca { + +using namespace llvm; + +void computeProcResourceMasks(const MCSchedModel &SM, + SmallVectorImpl &Masks) { + unsigned ProcResourceID = 0; + + // Create a unique bitmask for every processor resource unit. + // Skip resource at index 0, since it always references 'InvalidUnit'. + Masks.resize(SM.getNumProcResourceKinds()); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + ProcResourceID++; + } + + // Create a unique bitmask for every processor resource group. + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (!Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + for (unsigned U = 0; U < Desc.NumUnits; ++U) { + uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]]; + Masks[I] |= OtherMask; + } + ProcResourceID++; + } +} + +double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, + unsigned NumMicroOps, + ArrayRef ProcResourceUsage) { + // The block throughput is bounded from above by the hardware dispatch + // throughput. That is because the DispatchWidth is an upper bound on the + // number of opcodes that can be part of a single dispatch group. + double Max = static_cast(NumMicroOps) / DispatchWidth; + + // The block throughput is also limited by the amount of hardware parallelism. + // The number of available resource units affects the resource pressure + // distribution, as well as how many blocks can be executed every cycle. + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned ResourceCycles = ProcResourceUsage[I]; + if (!ResourceCycles) + continue; + + const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); + double Throughput = static_cast(ResourceCycles) / MCDesc.NumUnits; + Max = std::max(Max, Throughput); + } + + // The block reciprocal throughput is computed as the MAX of: + // - (NumMicroOps / DispatchWidth) + // - (NumUnits / ResourceCycles) for every consumed processor resource. + return Max; +} + +} // namespace mca Index: llvm/trunk/tools/llvm-mca/llvm-mca.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/llvm-mca.cpp +++ llvm/trunk/tools/llvm-mca/llvm-mca.cpp @@ -22,11 +22,9 @@ //===----------------------------------------------------------------------===// #include "CodeRegion.h" -#include "Context.h" -#include "FetchStage.h" -#include "InstructionTables.h" -#include "Pipeline.h" #include "PipelinePrinter.h" +#include "Stages/FetchStage.h" +#include "Stages/InstructionTables.h" #include "Views/DispatchStatistics.h" #include "Views/InstructionInfoView.h" #include "Views/RegisterFileStatistics.h" @@ -35,6 +33,8 @@ #include "Views/SchedulerStatistics.h" #include "Views/SummaryView.h" #include "Views/TimelineView.h" +#include "include/Context.h" +#include "include/Pipeline.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h"