Index: llvm/trunk/CODE_OWNERS.TXT =================================================================== --- llvm/trunk/CODE_OWNERS.TXT +++ llvm/trunk/CODE_OWNERS.TXT @@ -71,7 +71,7 @@ N: Andrea Di Biagio E: andrea.dibiagio@sony.com E: andrea.dibiagio@gmail.com -D: llvm-mca +D: MCA, llvm-mca N: Duncan P. N. Exon Smith E: dexonsmith@apple.com Index: llvm/trunk/include/llvm/MCA/Context.h =================================================================== --- llvm/trunk/include/llvm/MCA/Context.h +++ llvm/trunk/include/llvm/MCA/Context.h @@ -0,0 +1,69 @@ +//===---------------------------- Context.h ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a class for holding ownership of various simulated +/// hardware units. A Context also provides a utility routine for constructing +/// a default out-of-order pipeline with fetch, dispatch, execute, and retire +/// stages. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_CONTEXT_H +#define LLVM_MCA_CONTEXT_H + +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" +#include "llvm/MCA/InstrBuilder.h" +#include "llvm/MCA/Pipeline.h" +#include "llvm/MCA/SourceMgr.h" +#include + +namespace llvm { +namespace mca { + +/// This is a convenience struct to hold the parameters necessary for creating +/// the pre-built "default" out-of-order pipeline. +struct PipelineOptions { + PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS, + bool NoAlias) + : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS), + StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {} + unsigned DispatchWidth; + unsigned RegisterFileSize; + unsigned LoadQueueSize; + unsigned StoreQueueSize; + bool AssumeNoAlias; +}; + +class Context { + SmallVector, 4> Hardware; + const MCRegisterInfo &MRI; + const MCSubtargetInfo &STI; + +public: + Context(const MCRegisterInfo &R, const MCSubtargetInfo &S) : MRI(R), STI(S) {} + Context(const Context &C) = delete; + Context &operator=(const Context &C) = delete; + + void addHardwareUnit(std::unique_ptr H) { + Hardware.push_back(std::move(H)); + } + + /// Construct a basic pipeline for simulating an out-of-order pipeline. + /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages. + std::unique_ptr createDefaultPipeline(const PipelineOptions &Opts, + InstrBuilder &IB, + SourceMgr &SrcMgr); +}; + +} // namespace mca +} // namespace llvm +#endif // LLVM_MCA_CONTEXT_H Index: llvm/trunk/include/llvm/MCA/HWEventListener.h =================================================================== --- llvm/trunk/include/llvm/MCA/HWEventListener.h +++ llvm/trunk/include/llvm/MCA/HWEventListener.h @@ -0,0 +1,156 @@ +//===----------------------- HWEventListener.h ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the main interface for hardware event listeners. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_HWEVENTLISTENER_H +#define LLVM_MCA_HWEVENTLISTENER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Support.h" + +namespace llvm { +namespace mca { + +// An HWInstructionEvent represents state changes of instructions that +// listeners might be interested in. Listeners can choose to ignore any event +// they are not interested in. +class HWInstructionEvent { +public: + // This is the list of event types that are shared by all targets, that + // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent, + // ...) and generic Views can manipulate. + // Subtargets are free to define additional event types, that are goin to be + // handled by generic components as opaque values, but can still be + // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage, + // DispatchStage, ...) and interpreted by subtarget-specific EventListener + // implementations. + enum GenericEventType { + Invalid = 0, + // Events generated by the Retire Control Unit. + Retired, + // Events generated by the Scheduler. + Ready, + Issued, + Executed, + // Events generated by the Dispatch logic. + Dispatched, + + LastGenericEventType, + }; + + HWInstructionEvent(unsigned type, const InstRef &Inst) + : Type(type), IR(Inst) {} + + // The event type. The exact meaning depends on the subtarget. + const unsigned Type; + + // The instruction this event was generated for. + const InstRef &IR; +}; + +class HWInstructionIssuedEvent : public HWInstructionEvent { +public: + using ResourceRef = std::pair; + HWInstructionIssuedEvent(const InstRef &IR, + ArrayRef> UR) + : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {} + + ArrayRef> UsedResources; +}; + +class HWInstructionDispatchedEvent : public HWInstructionEvent { +public: + HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef Regs, + unsigned UOps) + : HWInstructionEvent(HWInstructionEvent::Dispatched, IR), + UsedPhysRegs(Regs), MicroOpcodes(UOps) {} + // Number of physical register allocated for this instruction. There is one + // entry per register file. + ArrayRef UsedPhysRegs; + // Number of micro opcodes dispatched. + // This field is often set to the total number of micro-opcodes specified by + // the instruction descriptor of IR. + // The only exception is when IR declares a number of micro opcodes + // which exceeds the processor DispatchWidth, and - by construction - it + // requires multiple cycles to be fully dispatched. In that particular case, + // the dispatch logic would generate more than one dispatch event (one per + // cycle), and each event would declare how many micro opcodes are effectively + // been dispatched to the schedulers. + unsigned MicroOpcodes; +}; + +class HWInstructionRetiredEvent : public HWInstructionEvent { +public: + HWInstructionRetiredEvent(const InstRef &IR, ArrayRef Regs) + : HWInstructionEvent(HWInstructionEvent::Retired, IR), + FreedPhysRegs(Regs) {} + // Number of register writes that have been architecturally committed. There + // is one entry per register file. + ArrayRef FreedPhysRegs; +}; + +// A HWStallEvent represents a pipeline stall caused by the lack of hardware +// resources. +class HWStallEvent { +public: + enum GenericEventType { + Invalid = 0, + // Generic stall events generated by the DispatchStage. + RegisterFileStall, + RetireControlUnitStall, + // Generic stall events generated by the Scheduler. + DispatchGroupStall, + SchedulerQueueFull, + LoadQueueFull, + StoreQueueFull, + LastGenericEvent + }; + + HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {} + + // The exact meaning of the stall event type depends on the subtarget. + const unsigned Type; + + // The instruction this event was generated for. + const InstRef &IR; +}; + +class HWEventListener { +public: + // Generic events generated by the pipeline. + virtual void onCycleBegin() {} + virtual void onCycleEnd() {} + + virtual void onEvent(const HWInstructionEvent &Event) {} + virtual void onEvent(const HWStallEvent &Event) {} + + using ResourceRef = std::pair; + virtual void onResourceAvailable(const ResourceRef &RRef) {} + + // Events generated by the Scheduler when buffered resources are + // consumed/freed for an instruction. + virtual void onReservedBuffers(const InstRef &Inst, + ArrayRef Buffers) {} + virtual void onReleasedBuffers(const InstRef &Inst, + ArrayRef Buffers) {} + + virtual ~HWEventListener() {} + +private: + virtual void anchor(); +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_HWEVENTLISTENER_H Index: llvm/trunk/include/llvm/MCA/HardwareUnits/HardwareUnit.h =================================================================== --- llvm/trunk/include/llvm/MCA/HardwareUnits/HardwareUnit.h +++ llvm/trunk/include/llvm/MCA/HardwareUnits/HardwareUnit.h @@ -0,0 +1,33 @@ +//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a base class for describing a simulated hardware +/// unit. These units are used to construct a simulated backend. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_HARDWAREUNIT_H +#define LLVM_MCA_HARDWAREUNIT_H + +namespace llvm { +namespace mca { + +class HardwareUnit { + HardwareUnit(const HardwareUnit &H) = delete; + HardwareUnit &operator=(const HardwareUnit &H) = delete; + +public: + HardwareUnit() = default; + virtual ~HardwareUnit(); +}; + +} // namespace mca +} // namespace llvm +#endif // LLVM_MCA_HARDWAREUNIT_H Index: llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h =================================================================== --- llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h +++ llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -0,0 +1,207 @@ +//===------------------------- LSUnit.h --------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A Load/Store unit class that models load/store queues and that implements +/// a simple weak memory consistency model. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_LSUNIT_H +#define LLVM_MCA_LSUNIT_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" + +namespace llvm { +namespace mca { + +class InstRef; +class Scheduler; + +/// A Load/Store Unit implementing a load and store queues. +/// +/// This class implements a load queue and a store queue to emulate the +/// out-of-order execution of memory operations. +/// Each load (or store) consumes an entry in the load (or store) queue. +/// +/// Rules are: +/// 1) A younger load is allowed to pass an older load only if there are no +/// stores nor barriers in between the two loads. +/// 2) An younger store is not allowed to pass an older store. +/// 3) A younger store is not allowed to pass an older load. +/// 4) A younger load is allowed to pass an older store only if the load does +/// not alias with the store. +/// +/// This class optimistically assumes that loads don't alias store operations. +/// Under this assumption, younger loads are always allowed to pass older +/// stores (this would only affects rule 4). +/// Essentially, this class doesn't perform any sort alias analysis to +/// identify aliasing loads and stores. +/// +/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be +/// set to `false` by the constructor of LSUnit. +/// +/// Note that this class doesn't know about the existence of different memory +/// types for memory operations (example: write-through, write-combining, etc.). +/// Derived classes are responsible for implementing that extra knowledge, and +/// provide different sets of rules for loads and stores by overriding method +/// `isReady()`. +/// To emulate a write-combining memory type, rule 2. must be relaxed in a +/// derived class to enable the reordering of non-aliasing store operations. +/// +/// No assumptions are made by this class on the size of the store buffer. This +/// class doesn't know how to identify cases where store-to-load forwarding may +/// occur. +/// +/// LSUnit doesn't attempt to predict whether a load or store hits or misses +/// the L1 cache. To be more specific, LSUnit doesn't know anything about +/// cache hierarchy and memory types. +/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the +/// scheduling model provides an "optimistic" load-to-use latency (which usually +/// matches the load-to-use latency for when there is a hit in the L1D). +/// Derived classes may expand this knowledge. +/// +/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor +/// memory-barrier like instructions. +/// LSUnit conservatively assumes that an instruction which `mayLoad` and has +/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it +/// serializes loads without forcing a flush of the load queue. +/// Similarly, instructions that both `mayStore` and have `unmodeled side +/// effects` are treated like store barriers. A full memory +/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side +/// effects. This is obviously inaccurate, but this is the best that we can do +/// at the moment. +/// +/// Each load/store barrier consumes one entry in the load/store queue. A +/// load/store barrier enforces ordering of loads/stores: +/// - A younger load cannot pass a load barrier. +/// - A younger store cannot pass a store barrier. +/// +/// A younger load has to wait for the memory load barrier to execute. +/// A load/store barrier is "executed" when it becomes the oldest entry in +/// the load/store queue(s). That also means, all the older loads/stores have +/// already been executed. +class LSUnit : public HardwareUnit { + // Load queue size. + // LQ_Size == 0 means that there are infinite slots in the load queue. + unsigned LQ_Size; + + // Store queue size. + // SQ_Size == 0 means that there are infinite slots in the store queue. + unsigned SQ_Size; + + // If true, loads will never alias with stores. This is the default. + bool NoAlias; + + // When a `MayLoad` instruction is dispatched to the schedulers for execution, + // the LSUnit reserves an entry in the `LoadQueue` for it. + // + // LoadQueue keeps track of all the loads that are in-flight. A load + // instruction is eventually removed from the LoadQueue when it reaches + // completion stage. That means, a load leaves the queue whe it is 'executed', + // and its value can be forwarded on the data path to outside units. + // + // This class doesn't know about the latency of a load instruction. So, it + // conservatively/pessimistically assumes that the latency of a load opcode + // matches the instruction latency. + // + // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses), + // and load/store conflicts, the latency of a load is determined by the depth + // of the load pipeline. So, we could use field `LoadLatency` in the + // MCSchedModel to model that latency. + // Field `LoadLatency` often matches the so-called 'load-to-use' latency from + // L1D, and it usually already accounts for any extra latency due to data + // forwarding. + // When doing throughput analysis, `LoadLatency` is likely to + // be a better predictor of load latency than instruction latency. This is + // particularly true when simulating code with temporal/spatial locality of + // memory accesses. + // Using `LoadLatency` (instead of the instruction latency) is also expected + // to improve the load queue allocation for long latency instructions with + // folded memory operands (See PR39829). + // + // FIXME: On some processors, load/store operations are split into multiple + // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but + // not 256-bit data types. So, a 256-bit load is effectively split into two + // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For + // simplicity, this class optimistically assumes that a load instruction only + // consumes one entry in the LoadQueue. Similarly, store instructions only + // consume a single entry in the StoreQueue. + // In future, we should reassess the quality of this design, and consider + // alternative approaches that let instructions specify the number of + // load/store queue entries which they consume at dispatch stage (See + // PR39830). + SmallSet LoadQueue; + SmallSet StoreQueue; + + void assignLQSlot(unsigned Index); + void assignSQSlot(unsigned Index); + bool isReadyNoAlias(unsigned Index) const; + + // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is + // conservatively treated as a store barrier. It forces older store to be + // executed before newer stores are issued. + SmallSet StoreBarriers; + + // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is + // conservatively treated as a load barrier. It forces older loads to execute + // before newer loads are issued. + SmallSet LoadBarriers; + + bool isSQEmpty() const { return StoreQueue.empty(); } + bool isLQEmpty() const { return LoadQueue.empty(); } + bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; } + bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; } + +public: + LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0, + bool AssumeNoAlias = false); + +#ifndef NDEBUG + void dump() const; +#endif + + enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL }; + + // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve + // IR. It also returns LSU_AVAILABLE if IR is not a memory operation. + Status isAvailable(const InstRef &IR) const; + + // Allocates load/store queue resources for IR. + // + // This method assumes that a previous call to `isAvailable(IR)` returned + // LSU_AVAILABLE, and that IR is a memory operation. + void dispatch(const InstRef &IR); + + // By default, rules are: + // 1. A store may not pass a previous store. + // 2. A load may not pass a previous store unless flag 'NoAlias' is set. + // 3. A load may pass a previous load. + // 4. A store may not pass a previous load (regardless of flag 'NoAlias'). + // 5. A load has to wait until an older load barrier is fully executed. + // 6. A store has to wait until an older store barrier is fully executed. + virtual bool isReady(const InstRef &IR) const; + + // Load and store instructions are tracked by their corresponding queues from + // dispatch until the "instruction executed" event. + // Only when a load instruction reaches the 'Executed' stage, its value + // becomes available to the users. At that point, the load no longer needs to + // be tracked by the load queue. + // FIXME: For simplicity, we optimistically assume a similar behavior for + // store instructions. In practice, store operations don't tend to leave the + // store queue until they reach the 'Retired' stage (See PR39830). + void onInstructionExecuted(const InstRef &IR); +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_LSUNIT_H Index: llvm/trunk/include/llvm/MCA/HardwareUnits/RegisterFile.h =================================================================== --- llvm/trunk/include/llvm/MCA/HardwareUnits/RegisterFile.h +++ llvm/trunk/include/llvm/MCA/HardwareUnits/RegisterFile.h @@ -0,0 +1,239 @@ +//===--------------------- RegisterFile.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a register mapping file class. This class is responsible +/// for managing hardware register files and the tracking of data dependencies +/// between registers. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_REGISTER_FILE_H +#define LLVM_MCA_REGISTER_FILE_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace mca { + +class ReadState; +class WriteState; +class WriteRef; + +/// Manages hardware register files, and tracks register definitions for +/// register renaming purposes. +class RegisterFile : public HardwareUnit { + const MCRegisterInfo &MRI; + + // class RegisterMappingTracker is a physical register file (PRF) descriptor. + // There is one RegisterMappingTracker for every PRF definition in the + // scheduling model. + // + // An instance of RegisterMappingTracker tracks the number of physical + // registers available for renaming. It also tracks the number of register + // moves eliminated per cycle. + struct RegisterMappingTracker { + // The total number of physical registers that are available in this + // register file for register renaming purpouses. A value of zero for this + // field means: this register file has an unbounded number of physical + // registers. + const unsigned NumPhysRegs; + // Number of physical registers that are currently in use. + unsigned NumUsedPhysRegs; + + // Maximum number of register moves that can be eliminated by this PRF every + // cycle. A value of zero means that there is no limit in the number of + // moves which can be eliminated every cycle. + const unsigned MaxMoveEliminatedPerCycle; + + // Number of register moves eliminated during this cycle. + // + // This value is increased by one every time a register move is eliminated. + // Every new cycle, this value is reset to zero. + // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if + // NumMoveEliminated is less than MaxMoveEliminatedPerCycle. + unsigned NumMoveEliminated; + + // If set, move elimination is restricted to zero-register moves only. + bool AllowZeroMoveEliminationOnly; + + RegisterMappingTracker(unsigned NumPhysRegisters, + unsigned MaxMoveEliminated = 0U, + bool AllowZeroMoveElimOnly = false) + : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0), + MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U), + AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly) {} + }; + + // A vector of register file descriptors. This set always contains at least + // one entry. Entry at index #0 is reserved. That entry describes a register + // file with an unbounded number of physical registers that "sees" all the + // hardware registers declared by the target (i.e. all the register + // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is + // the target name). + // + // Users can limit the number of physical registers that are available in + // regsiter file #0 specifying command line flag `-register-file-size=`. + SmallVector RegisterFiles; + + // This type is used to propagate information about the owner of a register, + // and the cost of allocating it in the PRF. Register cost is defined as the + // number of physical registers consumed by the PRF to allocate a user + // register. + // + // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical + // registers. So, the cost of allocating a YMM register in BtVer2 is 2. + using IndexPlusCostPairTy = std::pair; + + // Struct RegisterRenamingInfo is used to map logical registers to register + // files. + // + // There is a RegisterRenamingInfo object for every logical register defined + // by the target. RegisteRenamingInfo objects are stored into vector + // `RegisterMappings`, and MCPhysReg IDs can be used to reference + // elements in that vector. + // + // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost` + // specifies both the owning PRF, as well as the number of physical registers + // consumed at register renaming stage. + // + // Field `AllowMoveElimination` is set for registers that are used as + // destination by optimizable register moves. + // + // Field `AliasRegID` is set by writes from register moves that have been + // eliminated at register renaming stage. A move eliminated at register + // renaming stage is effectively bypassed, and its write aliases the source + // register definition. + struct RegisterRenamingInfo { + IndexPlusCostPairTy IndexPlusCost; + MCPhysReg RenameAs; + MCPhysReg AliasRegID; + bool AllowMoveElimination; + RegisterRenamingInfo() + : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U), AliasRegID(0U), + AllowMoveElimination(false) {} + }; + + // RegisterMapping objects are mainly used to track physical register + // definitions and resolve data dependencies. + // + // Every register declared by the Target is associated with an instance of + // RegisterMapping. RegisterMapping objects keep track of writes to a logical + // register. That information is used by class RegisterFile to resolve data + // dependencies, and correctly set latencies for register uses. + // + // This implementation does not allow overlapping register files. The only + // register file that is allowed to overlap with other register files is + // register file #0. If we exclude register #0, every register is "owned" by + // at most one register file. + using RegisterMapping = std::pair; + + // There is one entry per each register defined by the target. + std::vector RegisterMappings; + + // Used to track zero registers. There is one bit for each register defined by + // the target. Bits are set for registers that are known to be zero. + APInt ZeroRegisters; + + // This method creates a new register file descriptor. + // The new register file owns all of the registers declared by register + // classes in the 'RegisterClasses' set. + // + // Processor models allow the definition of RegisterFile(s) via tablegen. For + // example, this is a tablegen definition for a x86 register file for + // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1 + // physical register). + // + // def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]> + // + // Here FPRegisterFile contains all the registers defined by register class + // VR128RegClass and VR256RegClass. FPRegisterFile implements 60 + // registers which can be used for register renaming purpose. + void addRegisterFile(const MCRegisterFileDesc &RF, + ArrayRef Entries); + + // Consumes physical registers in each register file specified by the + // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`. + void allocatePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef UsedPhysRegs); + + // Releases previously allocated physical registers from the register file(s). + // This method is called from `invalidateRegisterMapping()`. + void freePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef FreedPhysRegs); + + // Collects writes that are in a RAW dependency with RS. + // This method is called from `addRegisterRead()`. + void collectWrites(const ReadState &RS, + SmallVectorImpl &Writes) const; + + // Create an instance of RegisterMappingTracker for every register file + // specified by the processor model. + // If no register file is specified, then this method creates a default + // register file with an unbounded number of physical registers. + void initialize(const MCSchedModel &SM, unsigned NumRegs); + +public: + RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri, + unsigned NumRegs = 0); + + // This method updates the register mappings inserting a new register + // definition. This method is also responsible for updating the number of + // allocated physical registers in each register file modified by the write. + // No physical regiser is allocated if this write is from a zero-idiom. + void addRegisterWrite(WriteRef Write, MutableArrayRef UsedPhysRegs); + + // Collect writes that are in a data dependency with RS, and update RS + // internal state. + void addRegisterRead(ReadState &RS, SmallVectorImpl &Writes) const; + + // Removes write \param WS from the register mappings. + // Physical registers may be released to reflect this update. + // No registers are released if this write is from a zero-idiom. + void removeRegisterWrite(const WriteState &WS, + MutableArrayRef FreedPhysRegs); + + // Returns true if a move from RS to WS can be eliminated. + // On success, it updates WriteState by setting flag `WS.isEliminated`. + // If RS is a read from a zero register, and WS is eliminated, then + // `WS.WritesZero` is also set, so that method addRegisterWrite() would not + // reserve a physical register for it. + bool tryEliminateMove(WriteState &WS, ReadState &RS); + + // Checks if there are enough physical registers in the register files. + // Returns a "response mask" where each bit represents the response from a + // different register file. A mask of all zeroes means that all register + // files are available. Otherwise, the mask can be used to identify which + // register file was busy. This sematic allows us to classify dispatch + // stalls caused by the lack of register file resources. + // + // Current implementation can simulate up to 32 register files (including the + // special register file at index #0). + unsigned isAvailable(ArrayRef Regs) const; + + // Returns the number of PRFs implemented by this processor. + unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } + + // Notify each PRF that a new cycle just started. + void cycleStart(); + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_REGISTER_FILE_H Index: llvm/trunk/include/llvm/MCA/HardwareUnits/ResourceManager.h =================================================================== --- llvm/trunk/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ llvm/trunk/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -0,0 +1,360 @@ +//===--------------------- ResourceManager.h --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The classes here represent processor resource units and their management +/// strategy. These classes are managed by the Scheduler. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_RESOURCE_MANAGER_H +#define LLVM_MCA_RESOURCE_MANAGER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Support.h" + +namespace llvm { +namespace mca { + +/// Used to notify the internal state of a processor resource. +/// +/// A processor resource is available if it is not reserved, and there are +/// available slots in the buffer. A processor resource is unavailable if it +/// is either reserved, or the associated buffer is full. A processor resource +/// with a buffer size of -1 is always available if it is not reserved. +/// +/// Values of type ResourceStateEvent are returned by method +/// ResourceState::isBufferAvailable(), which is used to query the internal +/// state of a resource. +/// +/// The naming convention for resource state events is: +/// * Event names start with prefix RS_ +/// * Prefix RS_ is followed by a string describing the actual resource state. +enum ResourceStateEvent { + RS_BUFFER_AVAILABLE, + RS_BUFFER_UNAVAILABLE, + RS_RESERVED +}; + +/// Resource allocation strategy used by hardware scheduler resources. +class ResourceStrategy { + ResourceStrategy(const ResourceStrategy &) = delete; + ResourceStrategy &operator=(const ResourceStrategy &) = delete; + +public: + ResourceStrategy() {} + virtual ~ResourceStrategy(); + + /// Selects a processor resource unit from a ReadyMask. + virtual uint64_t select(uint64_t ReadyMask) = 0; + + /// Called by the ResourceManager when a processor resource group, or a + /// processor resource with multiple units has become unavailable. + /// + /// The default strategy uses this information to bias its selection logic. + virtual void used(uint64_t ResourceMask) {} +}; + +/// Default resource allocation strategy used by processor resource groups and +/// processor resources with multiple units. +class DefaultResourceStrategy final : public ResourceStrategy { + /// A Mask of resource unit identifiers. + /// + /// There is one bit set for every available resource unit. + /// It defaults to the value of field ResourceSizeMask in ResourceState. + const unsigned ResourceUnitMask; + + /// A simple round-robin selector for processor resource units. + /// Each bit of this mask identifies a sub resource within a group. + /// + /// As an example, lets assume that this is a default policy for a + /// processor resource group composed by the following three units: + /// ResourceA -- 0b001 + /// ResourceB -- 0b010 + /// ResourceC -- 0b100 + /// + /// Field NextInSequenceMask is used to select the next unit from the set of + /// resource units. It defaults to the value of field `ResourceUnitMasks` (in + /// this example, it defaults to mask '0b111'). + /// + /// The round-robin selector would firstly select 'ResourceC', then + /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the + /// corresponding bit in NextInSequenceMask is cleared. For example, if + /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes + /// 0xb011. + /// + /// When NextInSequenceMask becomes zero, it is automatically reset to the + /// default value (i.e. ResourceUnitMask). + uint64_t NextInSequenceMask; + + /// This field is used to track resource units that are used (i.e. selected) + /// by other groups other than the one associated with this strategy object. + /// + /// In LLVM processor resource groups are allowed to partially (or fully) + /// overlap. That means, a same unit may be visible to multiple groups. + /// This field keeps track of uses that have originated from outside of + /// this group. The idea is to bias the selection strategy, so that resources + /// that haven't been used by other groups get prioritized. + /// + /// The end goal is to (try to) keep the resource distribution as much uniform + /// as possible. By construction, this mask only tracks one-level of resource + /// usage. Therefore, this strategy is expected to be less accurate when same + /// units are used multiple times by other groups within a single round of + /// select. + /// + /// Note: an LRU selector would have a better accuracy at the cost of being + /// slightly more expensive (mostly in terms of runtime cost). Methods + /// 'select' and 'used', are always in the hot execution path of llvm-mca. + /// Therefore, a slow implementation of 'select' would have a negative impact + /// on the overall performance of the tool. + uint64_t RemovedFromNextInSequence; + +public: + DefaultResourceStrategy(uint64_t UnitMask) + : ResourceStrategy(), ResourceUnitMask(UnitMask), + NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {} + virtual ~DefaultResourceStrategy() = default; + + uint64_t select(uint64_t ReadyMask) override; + void used(uint64_t Mask) override; +}; + +/// A processor resource descriptor. +/// +/// There is an instance of this class for every processor resource defined by +/// the machine scheduling model. +/// Objects of class ResourceState dynamically track the usage of processor +/// resource units. +class ResourceState { + /// An index to the MCProcResourceDesc entry in the processor model. + const unsigned ProcResourceDescIndex; + /// A resource mask. This is generated by the tool with the help of + /// function `mca::createProcResourceMasks' (see Support.h). + const uint64_t ResourceMask; + + /// A ProcResource can have multiple units. + /// + /// For processor resource groups, + /// this field default to the value of field `ResourceMask`; the number of + /// bits set is equal to the cardinality of the group. For normal (i.e. + /// non-group) resources, the number of bits set in this mask is equivalent + /// to the number of units declared by the processor model (see field + /// 'NumUnits' in 'ProcResourceUnits'). + uint64_t ResourceSizeMask; + + /// A mask of ready units. + uint64_t ReadyMask; + + /// Buffered resources will have this field set to a positive number different + /// than zero. A buffered resource behaves like a reservation station + /// implementing its own buffer for out-of-order execution. + /// + /// A BufferSize of 1 is used by scheduler resources that force in-order + /// execution. + /// + /// A BufferSize of 0 is used to model in-order issue/dispatch resources. + /// Since in-order issue/dispatch resources don't implement buffers, dispatch + /// events coincide with issue events. + /// Also, no other instruction ca be dispatched/issue while this resource is + /// in use. Only when all the "resource cycles" are consumed (after the issue + /// event), a new instruction ca be dispatched. + const int BufferSize; + + /// Available slots in the buffer (zero, if this is not a buffered resource). + unsigned AvailableSlots; + + /// This field is set if this resource is currently reserved. + /// + /// Resources can be reserved for a number of cycles. + /// Instructions can still be dispatched to reserved resources. However, + /// istructions dispatched to a reserved resource cannot be issued to the + /// underlying units (i.e. pipelines) until the resource is released. + bool Unavailable; + + const bool IsAGroup; + + /// Checks for the availability of unit 'SubResMask' in the group. + bool isSubResourceReady(uint64_t SubResMask) const { + return ReadyMask & SubResMask; + } + +public: + ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask); + + unsigned getProcResourceID() const { return ProcResourceDescIndex; } + uint64_t getResourceMask() const { return ResourceMask; } + uint64_t getReadyMask() const { return ReadyMask; } + int getBufferSize() const { return BufferSize; } + + bool isBuffered() const { return BufferSize > 0; } + bool isInOrder() const { return BufferSize == 1; } + + /// Returns true if this is an in-order dispatch/issue resource. + bool isADispatchHazard() const { return BufferSize == 0; } + bool isReserved() const { return Unavailable; } + + void setReserved() { Unavailable = true; } + void clearReserved() { Unavailable = false; } + + /// Returs true if this resource is not reserved, and if there are at least + /// `NumUnits` available units. + bool isReady(unsigned NumUnits = 1) const; + + bool isAResourceGroup() const { return IsAGroup; } + + bool containsResource(uint64_t ID) const { return ResourceMask & ID; } + + void markSubResourceAsUsed(uint64_t ID) { + assert(isSubResourceReady(ID)); + ReadyMask ^= ID; + } + + void releaseSubResource(uint64_t ID) { + assert(!isSubResourceReady(ID)); + ReadyMask ^= ID; + } + + unsigned getNumUnits() const { + return isAResourceGroup() ? 1U : countPopulation(ResourceSizeMask); + } + + /// Checks if there is an available slot in the resource buffer. + /// + /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if + /// there is a slot available. + /// + /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it + /// is reserved. + /// + /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. + ResourceStateEvent isBufferAvailable() const; + + /// Reserve a slot in the buffer. + void reserveBuffer() { + if (AvailableSlots) + AvailableSlots--; + } + + /// Release a slot in the buffer. + void releaseBuffer() { + if (BufferSize > 0) + AvailableSlots++; + assert(AvailableSlots <= static_cast(BufferSize)); + } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +/// A resource unit identifier. +/// +/// This is used to identify a specific processor resource unit using a pair +/// of indices where the 'first' index is a processor resource mask, and the +/// 'second' index is an index for a "sub-resource" (i.e. unit). +typedef std::pair ResourceRef; + +// First: a MCProcResourceDesc index identifying a buffered resource. +// Second: max number of buffer entries used in this resource. +typedef std::pair BufferUsageEntry; + +/// A resource manager for processor resource units and groups. +/// +/// This class owns all the ResourceState objects, and it is responsible for +/// acting on requests from a Scheduler by updating the internal state of +/// ResourceState objects. +/// This class doesn't know about instruction itineraries and functional units. +/// In future, it can be extended to support itineraries too through the same +/// public interface. +class ResourceManager { + // The resource manager owns all the ResourceState. + std::vector> Resources; + std::vector> Strategies; + + // Keeps track of which resources are busy, and how many cycles are left + // before those become usable again. + SmallDenseMap BusyResources; + + // A table to map processor resource IDs to processor resource masks. + SmallVector ProcResID2Mask; + + // Returns the actual resource unit that will be used. + ResourceRef selectPipe(uint64_t ResourceID); + + void use(const ResourceRef &RR); + void release(const ResourceRef &RR); + + unsigned getNumUnits(uint64_t ResourceID) const; + + // Overrides the selection strategy for the processor resource with the given + // mask. + void setCustomStrategyImpl(std::unique_ptr S, + uint64_t ResourceMask); + +public: + ResourceManager(const MCSchedModel &SM); + virtual ~ResourceManager() = default; + + // Overrides the selection strategy for the resource at index ResourceID in + // the MCProcResourceDesc table. + void setCustomStrategy(std::unique_ptr S, + unsigned ResourceID) { + assert(ResourceID < ProcResID2Mask.size() && + "Invalid resource index in input!"); + return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]); + } + + // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if + // there are enough available slots in the buffers. + ResourceStateEvent canBeDispatched(ArrayRef Buffers) const; + + // Return the processor resource identifier associated to this Mask. + unsigned resolveResourceMask(uint64_t Mask) const; + + // Consume a slot in every buffered resource from array 'Buffers'. Resource + // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. + void reserveBuffers(ArrayRef Buffers); + + // Release buffer entries previously allocated by method reserveBuffers. + void releaseBuffers(ArrayRef Buffers); + + // Reserve a processor resource. A reserved resource is not available for + // instruction issue until it is released. + void reserveResource(uint64_t ResourceID); + + // Release a previously reserved processor resource. + void releaseResource(uint64_t ResourceID); + + // Returns true if all resources are in-order, and there is at least one + // resource which is a dispatch hazard (BufferSize = 0). + bool mustIssueImmediately(const InstrDesc &Desc) const; + + bool canBeIssued(const InstrDesc &Desc) const; + + void issueInstruction( + const InstrDesc &Desc, + SmallVectorImpl> &Pipes); + + void cycleEvent(SmallVectorImpl &ResourcesFreed); + +#ifndef NDEBUG + void dump() const { + for (const std::unique_ptr &Resource : Resources) + Resource->dump(); + } +#endif +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_RESOURCE_MANAGER_H Index: llvm/trunk/include/llvm/MCA/HardwareUnits/RetireControlUnit.h =================================================================== --- llvm/trunk/include/llvm/MCA/HardwareUnits/RetireControlUnit.h +++ llvm/trunk/include/llvm/MCA/HardwareUnits/RetireControlUnit.h @@ -0,0 +1,104 @@ +//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file simulates the hardware responsible for retiring instructions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_RETIRE_CONTROL_UNIT_H +#define LLVM_MCA_RETIRE_CONTROL_UNIT_H + +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" +#include "llvm/MCA/Instruction.h" +#include + +namespace llvm { +namespace mca { + +/// This class tracks which instructions are in-flight (i.e., dispatched but not +/// retired) in the OoO backend. +// +/// This class checks on every cycle if/which instructions can be retired. +/// Instructions are retired in program order. +/// In the event of an instruction being retired, the pipeline that owns +/// this RetireControlUnit (RCU) gets notified. +/// +/// On instruction retired, register updates are all architecturally +/// committed, and any physicall registers previously allocated for the +/// retired instruction are freed. +struct RetireControlUnit : public HardwareUnit { + // A RUToken is created by the RCU for every instruction dispatched to the + // schedulers. These "tokens" are managed by the RCU in its token Queue. + // + // On every cycle ('cycleEvent'), the RCU iterates through the token queue + // looking for any token with its 'Executed' flag set. If a token has that + // flag set, then the instruction has reached the write-back stage and will + // be retired by the RCU. + // + // 'NumSlots' represents the number of entries consumed by the instruction in + // the reorder buffer. Those entries will become available again once the + // instruction is retired. + // + // Note that the size of the reorder buffer is defined by the scheduling + // model via field 'NumMicroOpBufferSize'. + struct RUToken { + InstRef IR; + unsigned NumSlots; // Slots reserved to this instruction. + bool Executed; // True if the instruction is past the WB stage. + }; + +private: + unsigned NextAvailableSlotIdx; + unsigned CurrentInstructionSlotIdx; + unsigned AvailableSlots; + unsigned MaxRetirePerCycle; // 0 means no limit. + std::vector Queue; + +public: + RetireControlUnit(const MCSchedModel &SM); + + bool isEmpty() const { return AvailableSlots == Queue.size(); } + bool isAvailable(unsigned Quantity = 1) const { + // Some instructions may declare a number of uOps which exceeds the size + // of the reorder buffer. To avoid problems, cap the amount of slots to + // the size of the reorder buffer. + Quantity = std::min(Quantity, static_cast(Queue.size())); + + // Further normalize the number of micro opcodes for instructions that + // declare zero opcodes. This should match the behavior of method + // reserveSlot(). + Quantity = std::max(Quantity, 1U); + return AvailableSlots >= Quantity; + } + + unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; } + + // Reserves a number of slots, and returns a new token. + unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps); + + // Return the current token from the RCU's circular token queue. + const RUToken &peekCurrentToken() const; + + // Advance the pointer to the next token in the circular token queue. + void consumeCurrentToken(); + + // Update the RCU token to represent the executed state. + void onInstructionExecuted(unsigned TokenID); + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_RETIRE_CONTROL_UNIT_H Index: llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h =================================================================== --- llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h +++ llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h @@ -0,0 +1,214 @@ +//===--------------------- Scheduler.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A scheduler for Processor Resource Units and Processor Resource Groups. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_SCHEDULER_H +#define LLVM_MCA_SCHEDULER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" +#include "llvm/MCA/HardwareUnits/LSUnit.h" +#include "llvm/MCA/HardwareUnits/ResourceManager.h" +#include "llvm/MCA/Support.h" + +namespace llvm { +namespace mca { + +class SchedulerStrategy { +public: + SchedulerStrategy() = default; + virtual ~SchedulerStrategy(); + + /// Returns true if Lhs should take priority over Rhs. + /// + /// This method is used by class Scheduler to select the "best" ready + /// instruction to issue to the underlying pipelines. + virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0; +}; + +/// Default instruction selection strategy used by class Scheduler. +class DefaultSchedulerStrategy : public SchedulerStrategy { + /// This method ranks instructions based on their age, and the number of known + /// users. The lower the rank value, the better. + int computeRank(const InstRef &Lhs) const { + return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers(); + } + +public: + DefaultSchedulerStrategy() = default; + virtual ~DefaultSchedulerStrategy(); + + bool compare(const InstRef &Lhs, const InstRef &Rhs) const override { + int LhsRank = computeRank(Lhs); + int RhsRank = computeRank(Rhs); + + /// Prioritize older instructions over younger instructions to minimize the + /// pressure on the reorder buffer. + if (LhsRank == RhsRank) + return Lhs.getSourceIndex() < Rhs.getSourceIndex(); + return LhsRank < RhsRank; + } +}; + +/// Class Scheduler is responsible for issuing instructions to pipeline +/// resources. +/// +/// Internally, it delegates to a ResourceManager the management of processor +/// resources. This class is also responsible for tracking the progress of +/// instructions from the dispatch stage, until the write-back stage. +/// +/// An instruction dispatched to the Scheduler is initially placed into either +/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input +/// operands. +/// +/// An instruction is moved from the WaitSet to the ReadySet when register +/// operands become available, and all memory dependencies are met. +/// Instructions that are moved from the WaitSet to the ReadySet transition +/// in state from 'IS_AVAILABLE' to 'IS_READY'. +/// +/// On every cycle, the Scheduler checks if it can promote instructions from the +/// WaitSet to the ReadySet. +/// +/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued +/// to a (one or more) pipeline(s). This event also causes an instruction state +/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction +/// leaves the IssuedSet when it reaches the write-back stage. +class Scheduler : public HardwareUnit { + LSUnit &LSU; + + // Instruction selection strategy for this Scheduler. + std::unique_ptr Strategy; + + // Hardware resources that are managed by this scheduler. + std::unique_ptr Resources; + + std::vector WaitSet; + std::vector ReadySet; + std::vector IssuedSet; + + /// Verify the given selection strategy and set the Strategy member + /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is + /// used. + void initializeStrategy(std::unique_ptr S); + + /// Issue an instruction without updating the ready queue. + void issueInstructionImpl( + InstRef &IR, + SmallVectorImpl> &Pipes); + + // Identify instructions that have finished executing, and remove them from + // the IssuedSet. References to executed instructions are added to input + // vector 'Executed'. + void updateIssuedSet(SmallVectorImpl &Executed); + + // Try to promote instructions from WaitSet to ReadySet. + // Add promoted instructions to the 'Ready' vector in input. + void promoteToReadySet(SmallVectorImpl &Ready); + +public: + Scheduler(const MCSchedModel &Model, LSUnit &Lsu) + : Scheduler(Model, Lsu, nullptr) {} + + Scheduler(const MCSchedModel &Model, LSUnit &Lsu, + std::unique_ptr SelectStrategy) + : Scheduler(make_unique(Model), Lsu, + std::move(SelectStrategy)) {} + + Scheduler(std::unique_ptr RM, LSUnit &Lsu, + std::unique_ptr SelectStrategy) + : LSU(Lsu), Resources(std::move(RM)) { + initializeStrategy(std::move(SelectStrategy)); + } + + // Stalls generated by the scheduler. + enum Status { + SC_AVAILABLE, + SC_LOAD_QUEUE_FULL, + SC_STORE_QUEUE_FULL, + SC_BUFFERS_FULL, + SC_DISPATCH_GROUP_STALL, + }; + + /// Check if the instruction in 'IR' can be dispatched and returns an answer + /// in the form of a Status value. + /// + /// The DispatchStage is responsible for querying the Scheduler before + /// dispatching new instructions. This routine is used for performing such + /// a query. If the instruction 'IR' can be dispatched, then true is + /// returned, otherwise false is returned with Event set to the stall type. + /// Internally, it also checks if the load/store unit is available. + Status isAvailable(const InstRef &IR) const; + + /// Reserves buffer and LSUnit queue resources that are necessary to issue + /// this instruction. + /// + /// Returns true if instruction IR is ready to be issued to the underlying + /// pipelines. Note that this operation cannot fail; it assumes that a + /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`. + void dispatch(const InstRef &IR); + + /// Returns true if IR is ready to be executed by the underlying pipelines. + /// This method assumes that IR has been previously dispatched. + bool isReady(const InstRef &IR) const; + + /// Issue an instruction and populates a vector of used pipeline resources, + /// and a vector of instructions that transitioned to the ready state as a + /// result of this event. + void issueInstruction( + InstRef &IR, + SmallVectorImpl> &Used, + SmallVectorImpl &Ready); + + /// Returns true if IR has to be issued immediately, or if IR is a zero + /// latency instruction. + bool mustIssueImmediately(const InstRef &IR) const; + + /// This routine notifies the Scheduler that a new cycle just started. + /// + /// It notifies the underlying ResourceManager that a new cycle just started. + /// Vector `Freed` is populated with resourceRef related to resources that + /// have changed in state, and that are now available to new instructions. + /// Instructions executed are added to vector Executed, while vector Ready is + /// populated with instructions that have become ready in this new cycle. + void cycleEvent(SmallVectorImpl &Freed, + SmallVectorImpl &Ready, + SmallVectorImpl &Executed); + + /// Convert a resource mask into a valid llvm processor resource identifier. + unsigned getResourceID(uint64_t Mask) const { + return Resources->resolveResourceMask(Mask); + } + + /// Select the next instruction to issue from the ReadySet. Returns an invalid + /// instruction reference if there are no ready instructions, or if processor + /// resources are not available. + InstRef select(); + +#ifndef NDEBUG + // Update the ready queues. + void dump() const; + + // This routine performs a sanity check. This routine should only be called + // when we know that 'IR' is not in the scheduler's instruction queues. + void sanityCheck(const InstRef &IR) const { + assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!"); + assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!"); + assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!"); + } +#endif // !NDEBUG +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_SCHEDULER_H Index: llvm/trunk/include/llvm/MCA/InstrBuilder.h =================================================================== --- llvm/trunk/include/llvm/MCA/InstrBuilder.h +++ llvm/trunk/include/llvm/MCA/InstrBuilder.h @@ -0,0 +1,77 @@ +//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A builder class for instructions that are statically analyzed by llvm-mca. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_INSTRBUILDER_H +#define LLVM_MCA_INSTRBUILDER_H + +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace mca { + +/// A builder class that knows how to construct Instruction objects. +/// +/// Every llvm-mca Instruction is described by an object of class InstrDesc. +/// An InstrDesc describes which registers are read/written by the instruction, +/// as well as the instruction latency and hardware resources consumed. +/// +/// This class is used by the tool to construct Instructions and instruction +/// descriptors (i.e. InstrDesc objects). +/// Information from the machine scheduling model is used to identify processor +/// resources that are consumed by an instruction. +class InstrBuilder { + const MCSubtargetInfo &STI; + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + const MCInstrAnalysis &MCIA; + SmallVector ProcResourceMasks; + + DenseMap> Descriptors; + DenseMap> VariantDescriptors; + + bool FirstCallInst; + bool FirstReturnInst; + + Expected createInstrDescImpl(const MCInst &MCI); + Expected getOrCreateInstrDesc(const MCInst &MCI); + + InstrBuilder(const InstrBuilder &) = delete; + InstrBuilder &operator=(const InstrBuilder &) = delete; + + void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID); + void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID); + Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const; + +public: + InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, + const MCRegisterInfo &RI, const MCInstrAnalysis &IA); + + void clear() { + VariantDescriptors.shrink_and_clear(); + FirstCallInst = true; + FirstReturnInst = true; + } + + Expected> createInstruction(const MCInst &MCI); +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_INSTRBUILDER_H Index: llvm/trunk/include/llvm/MCA/Instruction.h =================================================================== --- llvm/trunk/include/llvm/MCA/Instruction.h +++ llvm/trunk/include/llvm/MCA/Instruction.h @@ -0,0 +1,542 @@ +//===--------------------- Instruction.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines abstractions used by the Pipeline to model register reads, +/// register writes and instructions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_INSTRUCTION_H +#define LLVM_MCA_INSTRUCTION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MathExtras.h" + +#ifndef NDEBUG +#include "llvm/Support/raw_ostream.h" +#endif + +#include + +namespace llvm { +namespace mca { + +constexpr int UNKNOWN_CYCLES = -512; + +/// A register write descriptor. +struct WriteDescriptor { + // Operand index. The index is negative for implicit writes only. + // For implicit writes, the actual operand index is computed performing + // a bitwise not of the OpIndex. + int OpIndex; + // Write latency. Number of cycles before write-back stage. + unsigned Latency; + // This field is set to a value different than zero only if this + // is an implicit definition. + unsigned RegisterID; + // Instruction itineraries would set this field to the SchedClass ID. + // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry + // element associated to this write. + // When computing read latencies, this value is matched against the + // "ReadAdvance" information. The hardware backend may implement + // dedicated forwarding paths to quickly propagate write results to dependent + // instructions waiting in the reservation station (effectively bypassing the + // write-back stage). + unsigned SClassOrWriteResourceID; + // True only if this is a write obtained from an optional definition. + // Optional definitions are allowed to reference regID zero (i.e. "no + // register"). + bool IsOptionalDef; + + bool isImplicitWrite() const { return OpIndex < 0; }; +}; + +/// A register read descriptor. +struct ReadDescriptor { + // A MCOperand index. This is used by the Dispatch logic to identify register + // reads. Implicit reads have negative indices. The actual operand index of an + // implicit read is the bitwise not of field OpIndex. + int OpIndex; + // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit + // uses always come first in the sequence of uses. + unsigned UseIndex; + // This field is only set if this is an implicit read. + unsigned RegisterID; + // Scheduling Class Index. It is used to query the scheduling model for the + // MCSchedClassDesc object. + unsigned SchedClassID; + + bool isImplicitRead() const { return OpIndex < 0; }; +}; + +class ReadState; + +/// Tracks uses of a register definition (e.g. register write). +/// +/// Each implicit/explicit register write is associated with an instance of +/// this class. A WriteState object tracks the dependent users of a +/// register write. It also tracks how many cycles are left before the write +/// back stage. +class WriteState { + const WriteDescriptor *WD; + // On instruction issue, this field is set equal to the write latency. + // Before instruction issue, this field defaults to -512, a special + // value that represents an "unknown" number of cycles. + int CyclesLeft; + + // Actual register defined by this write. This field is only used + // to speedup queries on the register file. + // For implicit writes, this field always matches the value of + // field RegisterID from WD. + unsigned RegisterID; + + // Physical register file that serves register RegisterID. + unsigned PRFID; + + // True if this write implicitly clears the upper portion of RegisterID's + // super-registers. + bool ClearsSuperRegs; + + // True if this write is from a dependency breaking zero-idiom instruction. + bool WritesZero; + + // True if this write has been eliminated at register renaming stage. + // Example: a register move doesn't consume scheduler/pipleline resources if + // it is eliminated at register renaming stage. It still consumes + // decode bandwidth, and ROB entries. + bool IsEliminated; + + // This field is set if this is a partial register write, and it has a false + // dependency on any previous write of the same register (or a portion of it). + // DependentWrite must be able to complete before this write completes, so + // that we don't break the WAW, and the two writes can be merged together. + const WriteState *DependentWrite; + + // A partial write that is in a false dependency with this write. + WriteState *PartialWrite; + + unsigned DependentWriteCyclesLeft; + + // A list of dependent reads. Users is a set of dependent + // reads. A dependent read is added to the set only if CyclesLeft + // is "unknown". As soon as CyclesLeft is 'known', each user in the set + // gets notified with the actual CyclesLeft. + + // The 'second' element of a pair is a "ReadAdvance" number of cycles. + SmallVector, 4> Users; + +public: + WriteState(const WriteDescriptor &Desc, unsigned RegID, + bool clearsSuperRegs = false, bool writesZero = false) + : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), + PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), + IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr), + DependentWriteCyclesLeft(0) {} + + WriteState(const WriteState &Other) = default; + WriteState &operator=(const WriteState &Other) = default; + + int getCyclesLeft() const { return CyclesLeft; } + unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; } + unsigned getRegisterID() const { return RegisterID; } + unsigned getRegisterFileID() const { return PRFID; } + unsigned getLatency() const { return WD->Latency; } + + void addUser(ReadState *Use, int ReadAdvance); + void addUser(WriteState *Use); + + unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; } + + unsigned getNumUsers() const { + unsigned NumUsers = Users.size(); + if (PartialWrite) + ++NumUsers; + return NumUsers; + } + + bool clearsSuperRegisters() const { return ClearsSuperRegs; } + bool isWriteZero() const { return WritesZero; } + bool isEliminated() const { return IsEliminated; } + bool isExecuted() const { + return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0; + } + + const WriteState *getDependentWrite() const { return DependentWrite; } + void setDependentWrite(WriteState *Other) { DependentWrite = Other; } + void writeStartEvent(unsigned Cycles) { + DependentWriteCyclesLeft = Cycles; + DependentWrite = nullptr; + } + + void setWriteZero() { WritesZero = true; } + void setEliminated() { + assert(Users.empty() && "Write is in an inconsistent state."); + CyclesLeft = 0; + IsEliminated = true; + } + + void setPRF(unsigned PRF) { PRFID = PRF; } + + // On every cycle, update CyclesLeft and notify dependent users. + void cycleEvent(); + void onInstructionIssued(); + +#ifndef NDEBUG + void dump() const; +#endif +}; + +/// Tracks register operand latency in cycles. +/// +/// A read may be dependent on more than one write. This occurs when some +/// writes only partially update the register associated to this read. +class ReadState { + const ReadDescriptor *RD; + // Physical register identified associated to this read. + unsigned RegisterID; + // Physical register file that serves register RegisterID. + unsigned PRFID; + // Number of writes that contribute to the definition of RegisterID. + // In the absence of partial register updates, the number of DependentWrites + // cannot be more than one. + unsigned DependentWrites; + // Number of cycles left before RegisterID can be read. This value depends on + // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES. + // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of + // every dependent write is known. + int CyclesLeft; + // This field is updated on every writeStartEvent(). When the number of + // dependent writes (i.e. field DependentWrite) is zero, this value is + // propagated to field CyclesLeft. + unsigned TotalCycles; + // This field is set to true only if there are no dependent writes, and + // there are no `CyclesLeft' to wait. + bool IsReady; + // True if this is a read from a known zero register. + bool IsZero; + // True if this register read is from a dependency-breaking instruction. + bool IndependentFromDef; + +public: + ReadState(const ReadDescriptor &Desc, unsigned RegID) + : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0), + CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true), + IsZero(false), IndependentFromDef(false) {} + + const ReadDescriptor &getDescriptor() const { return *RD; } + unsigned getSchedClass() const { return RD->SchedClassID; } + unsigned getRegisterID() const { return RegisterID; } + unsigned getRegisterFileID() const { return PRFID; } + + bool isReady() const { return IsReady; } + bool isImplicitRead() const { return RD->isImplicitRead(); } + + bool isIndependentFromDef() const { return IndependentFromDef; } + void setIndependentFromDef() { IndependentFromDef = true; } + + void cycleEvent(); + void writeStartEvent(unsigned Cycles); + void setDependentWrites(unsigned Writes) { + DependentWrites = Writes; + IsReady = !Writes; + } + + bool isReadZero() const { return IsZero; } + void setReadZero() { IsZero = true; } + void setPRF(unsigned ID) { PRFID = ID; } +}; + +/// A sequence of cycles. +/// +/// This class can be used as a building block to construct ranges of cycles. +class CycleSegment { + unsigned Begin; // Inclusive. + unsigned End; // Exclusive. + bool Reserved; // Resources associated to this segment must be reserved. + +public: + CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false) + : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {} + + bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; } + bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; } + bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; } + bool overlaps(const CycleSegment &CS) const { + return !startsAfter(CS) && !endsBefore(CS); + } + bool isExecuting() const { return Begin == 0 && End != 0; } + bool isExecuted() const { return End == 0; } + bool operator<(const CycleSegment &Other) const { + return Begin < Other.Begin; + } + CycleSegment &operator--(void) { + if (Begin) + Begin--; + if (End) + End--; + return *this; + } + + bool isValid() const { return Begin <= End; } + unsigned size() const { return End - Begin; }; + void subtract(unsigned Cycles) { + assert(End >= Cycles); + End -= Cycles; + } + + unsigned begin() const { return Begin; } + unsigned end() const { return End; } + void setEnd(unsigned NewEnd) { End = NewEnd; } + bool isReserved() const { return Reserved; } + void setReserved() { Reserved = true; } +}; + +/// Helper used by class InstrDesc to describe how hardware resources +/// are used. +/// +/// This class describes how many resource units of a specific resource kind +/// (and how many cycles) are "used" by an instruction. +struct ResourceUsage { + CycleSegment CS; + unsigned NumUnits; + ResourceUsage(CycleSegment Cycles, unsigned Units = 1) + : CS(Cycles), NumUnits(Units) {} + unsigned size() const { return CS.size(); } + bool isReserved() const { return CS.isReserved(); } + void setReserved() { CS.setReserved(); } +}; + +/// An instruction descriptor +struct InstrDesc { + SmallVector Writes; // Implicit writes are at the end. + SmallVector Reads; // Implicit reads are at the end. + + // For every resource used by an instruction of this kind, this vector + // reports the number of "consumed cycles". + SmallVector, 4> Resources; + + // A list of buffered resources consumed by this instruction. + SmallVector Buffers; + + unsigned MaxLatency; + // Number of MicroOps for this instruction. + unsigned NumMicroOps; + + bool MayLoad; + bool MayStore; + bool HasSideEffects; + + // A zero latency instruction doesn't consume any scheduler resources. + bool isZeroLatency() const { return !MaxLatency && Resources.empty(); } + + InstrDesc() = default; + InstrDesc(const InstrDesc &Other) = delete; + InstrDesc &operator=(const InstrDesc &Other) = delete; +}; + +/// Base class for instructions consumed by the simulation pipeline. +/// +/// This class tracks data dependencies as well as generic properties +/// of the instruction. +class InstructionBase { + const InstrDesc &Desc; + + // This field is set for instructions that are candidates for move + // elimination. For more information about move elimination, see the + // definition of RegisterMappingTracker in RegisterFile.h + bool IsOptimizableMove; + + // Output dependencies. + // One entry per each implicit and explicit register definition. + SmallVector Defs; + + // Input dependencies. + // One entry per each implicit and explicit register use. + SmallVector Uses; + +public: + InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {} + + SmallVectorImpl &getDefs() { return Defs; } + const ArrayRef getDefs() const { return Defs; } + SmallVectorImpl &getUses() { return Uses; } + const ArrayRef getUses() const { return Uses; } + const InstrDesc &getDesc() const { return Desc; } + + unsigned getLatency() const { return Desc.MaxLatency; } + + bool hasDependentUsers() const { + return any_of(Defs, + [](const WriteState &Def) { return Def.getNumUsers() > 0; }); + } + + unsigned getNumUsers() const { + unsigned NumUsers = 0; + for (const WriteState &Def : Defs) + NumUsers += Def.getNumUsers(); + return NumUsers; + } + + // Returns true if this instruction is a candidate for move elimination. + bool isOptimizableMove() const { return IsOptimizableMove; } + void setOptimizableMove() { IsOptimizableMove = true; } +}; + +/// An instruction propagated through the simulated instruction pipeline. +/// +/// This class is used to monitor changes to the internal state of instructions +/// that are sent to the various components of the simulated hardware pipeline. +class Instruction : public InstructionBase { + enum InstrStage { + IS_INVALID, // Instruction in an invalid state. + IS_AVAILABLE, // Instruction dispatched but operands are not ready. + IS_READY, // Instruction dispatched and operands ready. + IS_EXECUTING, // Instruction issued. + IS_EXECUTED, // Instruction executed. Values are written back. + IS_RETIRED // Instruction retired. + }; + + // The current instruction stage. + enum InstrStage Stage; + + // This value defaults to the instruction latency. This instruction is + // considered executed when field CyclesLeft goes to zero. + int CyclesLeft; + + // Retire Unit token ID for this instruction. + unsigned RCUTokenID; + +public: + Instruction(const InstrDesc &D) + : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), + RCUTokenID(0) {} + + unsigned getRCUTokenID() const { return RCUTokenID; } + int getCyclesLeft() const { return CyclesLeft; } + + // Transition to the dispatch stage, and assign a RCUToken to this + // instruction. The RCUToken is used to track the completion of every + // register write performed by this instruction. + void dispatch(unsigned RCUTokenID); + + // Instruction issued. Transition to the IS_EXECUTING state, and update + // all the definitions. + void execute(); + + // Force a transition from the IS_AVAILABLE state to the IS_READY state if + // input operands are all ready. State transitions normally occur at the + // beginning of a new cycle (see method cycleEvent()). However, the scheduler + // may decide to promote instructions from the wait queue to the ready queue + // as the result of another issue event. This method is called every time the + // instruction might have changed in state. + void update(); + + bool isDispatched() const { return Stage == IS_AVAILABLE; } + bool isReady() const { return Stage == IS_READY; } + bool isExecuting() const { return Stage == IS_EXECUTING; } + bool isExecuted() const { return Stage == IS_EXECUTED; } + bool isRetired() const { return Stage == IS_RETIRED; } + + bool isEliminated() const { + return isReady() && getDefs().size() && + all_of(getDefs(), + [](const WriteState &W) { return W.isEliminated(); }); + } + + // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED. + void forceExecuted(); + + void retire() { + assert(isExecuted() && "Instruction is in an invalid state!"); + Stage = IS_RETIRED; + } + + void cycleEvent(); +}; + +/// An InstRef contains both a SourceMgr index and Instruction pair. The index +/// is used as a unique identifier for the instruction. MCA will make use of +/// this index as a key throughout MCA. +class InstRef { + std::pair Data; + +public: + InstRef() : Data(std::make_pair(0, nullptr)) {} + InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {} + + bool operator==(const InstRef &Other) const { return Data == Other.Data; } + + unsigned getSourceIndex() const { return Data.first; } + Instruction *getInstruction() { return Data.second; } + const Instruction *getInstruction() const { return Data.second; } + + /// Returns true if this references a valid instruction. + operator bool() const { return Data.second != nullptr; } + + /// Invalidate this reference. + void invalidate() { Data.second = nullptr; } + +#ifndef NDEBUG + void print(raw_ostream &OS) const { OS << getSourceIndex(); } +#endif +}; + +#ifndef NDEBUG +inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) { + IR.print(OS); + return OS; +} +#endif + +/// A reference to a register write. +/// +/// This class is mainly used by the register file to describe register +/// mappings. It correlates a register write to the source index of the +/// defining instruction. +class WriteRef { + std::pair Data; + static const unsigned INVALID_IID; + +public: + WriteRef() : Data(INVALID_IID, nullptr) {} + WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {} + + unsigned getSourceIndex() const { return Data.first; } + const WriteState *getWriteState() const { return Data.second; } + WriteState *getWriteState() { return Data.second; } + void invalidate() { Data.second = nullptr; } + bool isWriteZero() const { + assert(isValid() && "Invalid null WriteState found!"); + return getWriteState()->isWriteZero(); + } + + /// Returns true if this register write has been executed, and the new + /// register value is therefore available to users. + bool isAvailable() const { + if (getSourceIndex() == INVALID_IID) + return false; + const WriteState *WS = getWriteState(); + return !WS || WS->isExecuted(); + } + + bool isValid() const { return Data.first != INVALID_IID && Data.second; } + bool operator==(const WriteRef &Other) const { return Data == Other.Data; } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_INSTRUCTION_H Index: llvm/trunk/include/llvm/MCA/Pipeline.h =================================================================== --- llvm/trunk/include/llvm/MCA/Pipeline.h +++ llvm/trunk/include/llvm/MCA/Pipeline.h @@ -0,0 +1,79 @@ +//===--------------------- Pipeline.h ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements an ordered container of stages that simulate the +/// pipeline of a hardware backend. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_PIPELINE_H +#define LLVM_MCA_PIPELINE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/MCA/Stages/Stage.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace mca { + +class HWEventListener; + +/// A pipeline for a specific subtarget. +/// +/// It emulates an out-of-order execution of instructions. Instructions are +/// fetched from a MCInst sequence managed by an initial 'Fetch' stage. +/// Instructions are firstly fetched, then dispatched to the schedulers, and +/// then executed. +/// +/// This class tracks the lifetime of an instruction from the moment where +/// it gets dispatched to the schedulers, to the moment where it finishes +/// executing and register writes are architecturally committed. +/// In particular, it monitors changes in the state of every instruction +/// in flight. +/// +/// Instructions are executed in a loop of iterations. The number of iterations +/// is defined by the SourceMgr object, which is managed by the initial stage +/// of the instruction pipeline. +/// +/// The Pipeline entry point is method 'run()' which executes cycles in a loop +/// until there are new instructions to dispatch, and not every instruction +/// has been retired. +/// +/// Internally, the Pipeline collects statistical information in the form of +/// histograms. For example, it tracks how the dispatch group size changes +/// over time. +class Pipeline { + Pipeline(const Pipeline &P) = delete; + Pipeline &operator=(const Pipeline &P) = delete; + + /// An ordered list of stages that define this instruction pipeline. + SmallVector, 8> Stages; + std::set Listeners; + unsigned Cycles; + + Error runCycle(); + bool hasWorkToProcess(); + void notifyCycleBegin(); + void notifyCycleEnd(); + +public: + Pipeline() : Cycles(0) {} + void appendStage(std::unique_ptr S); + + /// Returns the total number of simulated cycles. + Expected run(); + + void addEventListener(HWEventListener *Listener); +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_PIPELINE_H Index: llvm/trunk/include/llvm/MCA/SourceMgr.h =================================================================== --- llvm/trunk/include/llvm/MCA/SourceMgr.h +++ llvm/trunk/include/llvm/MCA/SourceMgr.h @@ -0,0 +1,57 @@ +//===--------------------- SourceMgr.h --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements class SourceMgr. Class SourceMgr abstracts the input +/// code sequence (a sequence of MCInst), and assings unique identifiers to +/// every instruction in the sequence. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_SOURCEMGR_H +#define LLVM_MCA_SOURCEMGR_H + +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { +namespace mca { + +class Instruction; + +typedef std::pair SourceRef; + +class SourceMgr { + using UniqueInst = std::unique_ptr; + ArrayRef Sequence; + unsigned Current; + const unsigned Iterations; + static const unsigned DefaultIterations = 100; + +public: + SourceMgr(ArrayRef S, unsigned Iter) + : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {} + + unsigned getNumIterations() const { return Iterations; } + unsigned size() const { return Sequence.size(); } + bool hasNext() const { return Current < (Iterations * Sequence.size()); } + void updateNext() { ++Current; } + + SourceRef peekNext() const { + assert(hasNext() && "Already at end of sequence!"); + return SourceRef(Current, *Sequence[Current % Sequence.size()]); + } + + using const_iterator = ArrayRef::const_iterator; + const_iterator begin() const { return Sequence.begin(); } + const_iterator end() const { return Sequence.end(); } +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_SOURCEMGR_H Index: llvm/trunk/include/llvm/MCA/Stages/DispatchStage.h =================================================================== --- llvm/trunk/include/llvm/MCA/Stages/DispatchStage.h +++ llvm/trunk/include/llvm/MCA/Stages/DispatchStage.h @@ -0,0 +1,93 @@ +//===----------------------- DispatchStage.h --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file models the dispatch component of an instruction pipeline. +/// +/// The DispatchStage is responsible for updating instruction dependencies +/// and communicating to the simulated instruction scheduler that an instruction +/// is ready to be scheduled for execution. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_DISPATCH_STAGE_H +#define LLVM_MCA_DISPATCH_STAGE_H + +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/MCA/HardwareUnits/RegisterFile.h" +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Stages/Stage.h" + +namespace llvm { +namespace mca { + +// Implements the hardware dispatch logic. +// +// This class is responsible for the dispatch stage, in which instructions are +// dispatched in groups to the Scheduler. An instruction can be dispatched if +// the following conditions are met: +// 1) There are enough entries in the reorder buffer (see class +// RetireControlUnit) to write the opcodes associated with the instruction. +// 2) There are enough physical registers to rename output register operands. +// 3) There are enough entries available in the used buffered resource(s). +// +// The number of micro opcodes that can be dispatched in one cycle is limited by +// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when +// processor resources are not available. Dispatch stall events are counted +// during the entire execution of the code, and displayed by the performance +// report when flag '-dispatch-stats' is specified. +// +// If the number of micro opcodes exceedes DispatchWidth, then the instruction +// is dispatched in multiple cycles. +class DispatchStage final : public Stage { + unsigned DispatchWidth; + unsigned AvailableEntries; + unsigned CarryOver; + InstRef CarriedOver; + const MCSubtargetInfo &STI; + RetireControlUnit &RCU; + RegisterFile &PRF; + + bool checkRCU(const InstRef &IR) const; + bool checkPRF(const InstRef &IR) const; + bool canDispatch(const InstRef &IR) const; + Error dispatch(InstRef IR); + + void updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI); + + void notifyInstructionDispatched(const InstRef &IR, + ArrayRef UsedPhysRegs, + unsigned uOps) const; + +public: + DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI, + unsigned MaxDispatchWidth, RetireControlUnit &R, + RegisterFile &F) + : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), + CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {} + + bool isAvailable(const InstRef &IR) const override; + + // The dispatch logic internally doesn't buffer instructions. So there is + // never work to do at the beginning of every cycle. + bool hasWorkToComplete() const override { return false; } + Error cycleStart() override; + Error execute(InstRef &IR) override; + +#ifndef NDEBUG + void dump() const; +#endif +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_DISPATCH_STAGE_H Index: llvm/trunk/include/llvm/MCA/Stages/EntryStage.h =================================================================== --- llvm/trunk/include/llvm/MCA/Stages/EntryStage.h +++ llvm/trunk/include/llvm/MCA/Stages/EntryStage.h @@ -0,0 +1,52 @@ +//===---------------------- EntryStage.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the Entry stage of an instruction pipeline. Its sole +/// purpose in life is to pick instructions in sequence and move them to the +/// next pipeline stage. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_ENTRY_STAGE_H +#define LLVM_MCA_ENTRY_STAGE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/SourceMgr.h" +#include "llvm/MCA/Stages/Stage.h" + +namespace llvm { +namespace mca { + +class EntryStage final : public Stage { + InstRef CurrentInstruction; + SmallVector, 16> Instructions; + SourceMgr &SM; + unsigned NumRetired; + + // Updates the program counter, and sets 'CurrentInstruction'. + void getNextInstruction(); + + EntryStage(const EntryStage &Other) = delete; + EntryStage &operator=(const EntryStage &Other) = delete; + +public: + EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { } + + bool isAvailable(const InstRef &IR) const override; + bool hasWorkToComplete() const override; + Error execute(InstRef &IR) override; + Error cycleStart() override; + Error cycleEnd() override; +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_FETCH_STAGE_H Index: llvm/trunk/include/llvm/MCA/Stages/ExecuteStage.h =================================================================== --- llvm/trunk/include/llvm/MCA/Stages/ExecuteStage.h +++ llvm/trunk/include/llvm/MCA/Stages/ExecuteStage.h @@ -0,0 +1,80 @@ +//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the execution stage of a default instruction pipeline. +/// +/// The ExecuteStage is responsible for managing the hardware scheduler +/// and issuing notifications that an instruction has been executed. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_EXECUTE_STAGE_H +#define LLVM_MCA_EXECUTE_STAGE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Stages/Stage.h" + +namespace llvm { +namespace mca { + +class ExecuteStage final : public Stage { + Scheduler &HWS; + + Error issueInstruction(InstRef &IR); + + // Called at the beginning of each cycle to issue already dispatched + // instructions to the underlying pipelines. + Error issueReadyInstructions(); + + // Used to notify instructions eliminated at register renaming stage. + Error handleInstructionEliminated(InstRef &IR); + + ExecuteStage(const ExecuteStage &Other) = delete; + ExecuteStage &operator=(const ExecuteStage &Other) = delete; + +public: + ExecuteStage(Scheduler &S) : Stage(), HWS(S) {} + + // This stage works under the assumption that the Pipeline will eventually + // execute a retire stage. We don't need to check if pipelines and/or + // schedulers have instructions to process, because those instructions are + // also tracked by the retire control unit. That means, + // RetireControlUnit::hasWorkToComplete() is responsible for checking if there + // are still instructions in-flight in the out-of-order backend. + bool hasWorkToComplete() const override { return false; } + bool isAvailable(const InstRef &IR) const override; + + // Notifies the scheduler that a new cycle just started. + // + // This method notifies the scheduler that a new cycle started. + // This method is also responsible for notifying listeners about instructions + // state changes, and processor resources freed by the scheduler. + // Instructions that transitioned to the 'Executed' state are automatically + // moved to the next stage (i.e. RetireStage). + Error cycleStart() override; + Error execute(InstRef &IR) override; + + void notifyInstructionIssued( + const InstRef &IR, + ArrayRef> Used) const; + void notifyInstructionExecuted(const InstRef &IR) const; + void notifyInstructionReady(const InstRef &IR) const; + void notifyResourceAvailable(const ResourceRef &RR) const; + + // Notify listeners that buffered resources have been consumed or freed. + void notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const; +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_EXECUTE_STAGE_H Index: llvm/trunk/include/llvm/MCA/Stages/InstructionTables.h =================================================================== --- llvm/trunk/include/llvm/MCA/Stages/InstructionTables.h +++ llvm/trunk/include/llvm/MCA/Stages/InstructionTables.h @@ -0,0 +1,45 @@ +//===--------------------- InstructionTables.h ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a custom stage to generate instruction tables. +/// See the description of command-line flag -instruction-tables in +/// docs/CommandGuide/lvm-mca.rst +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_INSTRUCTIONTABLES_H +#define LLVM_MCA_INSTRUCTIONTABLES_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/MCA/Stages/Stage.h" +#include "llvm/MCA/Support.h" + +namespace llvm { +namespace mca { + +class InstructionTables final : public Stage { + const MCSchedModel &SM; + SmallVector, 4> UsedResources; + SmallVector Masks; + +public: + InstructionTables(const MCSchedModel &Model) : Stage(), SM(Model) { + computeProcResourceMasks(Model, Masks); + } + + bool hasWorkToComplete() const override { return false; } + Error execute(InstRef &IR) override; +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_INSTRUCTIONTABLES_H Index: llvm/trunk/include/llvm/MCA/Stages/RetireStage.h =================================================================== --- llvm/trunk/include/llvm/MCA/Stages/RetireStage.h +++ llvm/trunk/include/llvm/MCA/Stages/RetireStage.h @@ -0,0 +1,48 @@ +//===---------------------- RetireStage.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the retire stage of a default instruction pipeline. +/// The RetireStage represents the process logic that interacts with the +/// simulated RetireControlUnit hardware. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_RETIRE_STAGE_H +#define LLVM_MCA_RETIRE_STAGE_H + +#include "llvm/MCA/HardwareUnits/RegisterFile.h" +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" +#include "llvm/MCA/Stages/Stage.h" + +namespace llvm { +namespace mca { + +class RetireStage final : public Stage { + // Owner will go away when we move listeners/eventing to the stages. + RetireControlUnit &RCU; + RegisterFile &PRF; + + RetireStage(const RetireStage &Other) = delete; + RetireStage &operator=(const RetireStage &Other) = delete; + +public: + RetireStage(RetireControlUnit &R, RegisterFile &F) + : Stage(), RCU(R), PRF(F) {} + + bool hasWorkToComplete() const override { return !RCU.isEmpty(); } + Error cycleStart() override; + Error execute(InstRef &IR) override; + void notifyInstructionRetired(const InstRef &IR) const; +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_RETIRE_STAGE_H Index: llvm/trunk/include/llvm/MCA/Stages/Stage.h =================================================================== --- llvm/trunk/include/llvm/MCA/Stages/Stage.h +++ llvm/trunk/include/llvm/MCA/Stages/Stage.h @@ -0,0 +1,88 @@ +//===---------------------- Stage.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a stage. +/// A chain of stages compose an instruction pipeline. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_STAGE_H +#define LLVM_MCA_STAGE_H + +#include "llvm/MCA/HWEventListener.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace mca { + +class InstRef; + +class Stage { + Stage *NextInSequence; + std::set Listeners; + + Stage(const Stage &Other) = delete; + Stage &operator=(const Stage &Other) = delete; + +protected: + const std::set &getListeners() const { return Listeners; } + +public: + Stage() : NextInSequence(nullptr) {} + virtual ~Stage(); + + /// Returns true if it can execute IR during this cycle. + virtual bool isAvailable(const InstRef &IR) const { return true; } + + /// Returns true if some instructions are still executing this stage. + virtual bool hasWorkToComplete() const = 0; + + /// Called once at the start of each cycle. This can be used as a setup + /// phase to prepare for the executions during the cycle. + virtual Error cycleStart() { return ErrorSuccess(); } + + /// Called once at the end of each cycle. + virtual Error cycleEnd() { return ErrorSuccess(); } + + /// The primary action that this stage performs on instruction IR. + virtual Error execute(InstRef &IR) = 0; + + void setNextInSequence(Stage *NextStage) { + assert(!NextInSequence && "This stage already has a NextInSequence!"); + NextInSequence = NextStage; + } + + bool checkNextStage(const InstRef &IR) const { + return NextInSequence && NextInSequence->isAvailable(IR); + } + + /// Called when an instruction is ready to move the next pipeline stage. + /// + /// Stages are responsible for moving instructions to their immediate + /// successor stages. + Error moveToTheNextStage(InstRef &IR) { + assert(checkNextStage(IR) && "Next stage is not ready!"); + return NextInSequence->execute(IR); + } + + /// Add a listener to receive callbacks during the execution of this stage. + void addListener(HWEventListener *Listener); + + /// Notify listeners of a particular hardware event. + template void notifyEvent(const EventT &Event) const { + for (HWEventListener *Listener : Listeners) + Listener->onEvent(Event); + } +}; + +} // namespace mca +} // namespace llvm +#endif // LLVM_MCA_STAGE_H Index: llvm/trunk/include/llvm/MCA/Support.h =================================================================== --- llvm/trunk/include/llvm/MCA/Support.h +++ llvm/trunk/include/llvm/MCA/Support.h @@ -0,0 +1,119 @@ +//===--------------------- Support.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Helper functions used by various pipeline components. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_SUPPORT_H +#define LLVM_MCA_SUPPORT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace mca { + +template +class InstructionError : public ErrorInfo> { +public: + static char ID; + std::string Message; + const T &Inst; + + InstructionError(std::string M, const T &MCI) + : Message(std::move(M)), Inst(MCI) {} + + void log(raw_ostream &OS) const override { OS << Message; } + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } +}; + +template char InstructionError::ID; + +/// This class represents the number of cycles per resource (fractions of +/// cycles). That quantity is managed here as a ratio, and accessed via the +/// double cast-operator below. The two quantities, number of cycles and +/// number of resources, are kept separate. This is used by the +/// ResourcePressureView to calculate the average resource cycles +/// per instruction/iteration. +class ResourceCycles { + unsigned Numerator, Denominator; + +public: + ResourceCycles() : Numerator(0), Denominator(1) {} + ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1) + : Numerator(Cycles), Denominator(ResourceUnits) {} + + operator double() const { + assert(Denominator && "Invalid denominator (must be non-zero)."); + return (Denominator == 1) ? Numerator : (double)Numerator / Denominator; + } + + // Add the components of RHS to this instance. Instead of calculating + // the final value here, we keep track of the numerator and denominator + // separately, to reduce floating point error. + ResourceCycles &operator+=(const ResourceCycles &RHS) { + if (Denominator == RHS.Denominator) + Numerator += RHS.Numerator; + else { + // Create a common denominator for LHS and RHS by calculating the least + // common multiple from the GCD. + unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator); + unsigned LCM = (Denominator * RHS.Denominator) / GCD; + unsigned LHSNumerator = Numerator * (LCM / Denominator); + unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator); + Numerator = LHSNumerator + RHSNumerator; + Denominator = LCM; + } + return *this; + } +}; + +/// Populates vector Masks with processor resource masks. +/// +/// The number of bits set in a mask depends on the processor resource type. +/// Each processor resource mask has at least one bit set. For groups, the +/// number of bits set in the mask is equal to the cardinality of the group plus +/// one. Excluding the most significant bit, the remaining bits in the mask +/// identify processor resources that are part of the group. +/// +/// Example: +/// +/// ResourceA -- Mask: 0b001 +/// ResourceB -- Mask: 0b010 +/// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111 +/// +/// ResourceAB is a processor resource group containing ResourceA and ResourceB. +/// Each resource mask uniquely identifies a resource; both ResourceA and +/// ResourceB only have one bit set. +/// ResourceAB is a group; excluding the most significant bit in the mask, the +/// remaining bits identify the composition of the group. +/// +/// Resource masks are used by the ResourceManager to solve set membership +/// problems with simple bit manipulation operations. +void computeProcResourceMasks(const MCSchedModel &SM, + SmallVectorImpl &Masks); + +/// Compute the reciprocal block throughput from a set of processor resource +/// cycles. The reciprocal block throughput is computed as the MAX between: +/// - NumMicroOps / DispatchWidth +/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource). +double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, + unsigned NumMicroOps, + ArrayRef ProcResourceUsage); +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_SUPPORT_H Index: llvm/trunk/lib/CMakeLists.txt =================================================================== --- llvm/trunk/lib/CMakeLists.txt +++ llvm/trunk/lib/CMakeLists.txt @@ -12,6 +12,7 @@ add_subdirectory(Analysis) add_subdirectory(LTO) add_subdirectory(MC) +add_subdirectory(MCA) add_subdirectory(Object) add_subdirectory(ObjectYAML) add_subdirectory(Option) Index: llvm/trunk/lib/LLVMBuild.txt =================================================================== --- llvm/trunk/lib/LLVMBuild.txt +++ llvm/trunk/lib/LLVMBuild.txt @@ -31,6 +31,7 @@ IRReader LTO MC + MCA Object BinaryFormat ObjectYAML Index: llvm/trunk/lib/MCA/CMakeLists.txt =================================================================== --- llvm/trunk/lib/MCA/CMakeLists.txt +++ llvm/trunk/lib/MCA/CMakeLists.txt @@ -0,0 +1,23 @@ +add_llvm_library(LLVMMCA + Context.cpp + HWEventListener.cpp + HardwareUnits/HardwareUnit.cpp + HardwareUnits/LSUnit.cpp + HardwareUnits/RegisterFile.cpp + HardwareUnits/ResourceManager.cpp + HardwareUnits/RetireControlUnit.cpp + HardwareUnits/Scheduler.cpp + InstrBuilder.cpp + Instruction.cpp + Pipeline.cpp + Stages/DispatchStage.cpp + Stages/EntryStage.cpp + Stages/ExecuteStage.cpp + Stages/InstructionTables.cpp + Stages/RetireStage.cpp + Stages/Stage.cpp + Support.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/MCA + ) Index: llvm/trunk/lib/MCA/Context.cpp =================================================================== --- llvm/trunk/lib/MCA/Context.cpp +++ llvm/trunk/lib/MCA/Context.cpp @@ -0,0 +1,65 @@ +//===---------------------------- Context.cpp -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a class for holding ownership of various simulated +/// hardware units. A Context also provides a utility routine for constructing +/// a default out-of-order pipeline with fetch, dispatch, execute, and retire +/// stages. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Context.h" +#include "llvm/MCA/HardwareUnits/RegisterFile.h" +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/MCA/Stages/DispatchStage.h" +#include "llvm/MCA/Stages/EntryStage.h" +#include "llvm/MCA/Stages/ExecuteStage.h" +#include "llvm/MCA/Stages/RetireStage.h" + +namespace llvm { +namespace mca { + +std::unique_ptr +Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, + SourceMgr &SrcMgr) { + const MCSchedModel &SM = STI.getSchedModel(); + + // Create the hardware units defining the backend. + auto RCU = llvm::make_unique(SM); + auto PRF = llvm::make_unique(SM, MRI, Opts.RegisterFileSize); + auto LSU = llvm::make_unique(SM, Opts.LoadQueueSize, + Opts.StoreQueueSize, Opts.AssumeNoAlias); + auto HWS = llvm::make_unique(SM, *LSU); + + // Create the pipeline stages. + auto Fetch = llvm::make_unique(SrcMgr); + auto Dispatch = llvm::make_unique(STI, MRI, Opts.DispatchWidth, + *RCU, *PRF); + auto Execute = llvm::make_unique(*HWS); + auto Retire = llvm::make_unique(*RCU, *PRF); + + // Pass the ownership of all the hardware units to this Context. + addHardwareUnit(std::move(RCU)); + addHardwareUnit(std::move(PRF)); + addHardwareUnit(std::move(LSU)); + addHardwareUnit(std::move(HWS)); + + // Build the pipeline. + auto StagePipeline = llvm::make_unique(); + StagePipeline->appendStage(std::move(Fetch)); + StagePipeline->appendStage(std::move(Dispatch)); + StagePipeline->appendStage(std::move(Execute)); + StagePipeline->appendStage(std::move(Retire)); + return StagePipeline; +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/HWEventListener.cpp =================================================================== --- llvm/trunk/lib/MCA/HWEventListener.cpp +++ llvm/trunk/lib/MCA/HWEventListener.cpp @@ -0,0 +1,23 @@ +//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a vtable anchor for class HWEventListener. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HWEventListener.h" + +namespace llvm { +namespace mca { + +// Anchor the vtable here. +void HWEventListener::anchor() {} +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/HardwareUnits/HardwareUnit.cpp =================================================================== --- llvm/trunk/lib/MCA/HardwareUnits/HardwareUnit.cpp +++ llvm/trunk/lib/MCA/HardwareUnits/HardwareUnit.cpp @@ -0,0 +1,25 @@ +//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the anchor for the base class that describes +/// simulated hardware units. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" + +namespace llvm { +namespace mca { + +// Pin the vtable with this method. +HardwareUnit::~HardwareUnit() = default; + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp =================================================================== --- llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp +++ llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp @@ -0,0 +1,190 @@ +//===----------------------- LSUnit.cpp --------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A Load-Store Unit for the llvm-mca tool. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/LSUnit.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, + bool AssumeNoAlias) + : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) { + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (!LQ_Size && EPI.LoadQueueID) { + const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID); + LQ_Size = LdQDesc.BufferSize; + } + + if (!SQ_Size && EPI.StoreQueueID) { + const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID); + SQ_Size = StQDesc.BufferSize; + } + } +} + +#ifndef NDEBUG +void LSUnit::dump() const { + dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n'; + dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n'; + dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n'; + dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n'; +} +#endif + +void LSUnit::assignLQSlot(unsigned Index) { + assert(!isLQFull()); + assert(LoadQueue.count(Index) == 0); + + LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot \n"); + LoadQueue.insert(Index); +} + +void LSUnit::assignSQSlot(unsigned Index) { + assert(!isSQFull()); + assert(StoreQueue.count(Index) == 0); + + LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot \n"); + StoreQueue.insert(Index); +} + +void LSUnit::dispatch(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + unsigned IsMemBarrier = Desc.HasSideEffects; + assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); + + const unsigned Index = IR.getSourceIndex(); + if (Desc.MayLoad) { + if (IsMemBarrier) + LoadBarriers.insert(Index); + assignLQSlot(Index); + } + + if (Desc.MayStore) { + if (IsMemBarrier) + StoreBarriers.insert(Index); + assignSQSlot(Index); + } +} + +LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + if (Desc.MayLoad && isLQFull()) + return LSUnit::LSU_LQUEUE_FULL; + if (Desc.MayStore && isSQFull()) + return LSUnit::LSU_SQUEUE_FULL; + return LSUnit::LSU_AVAILABLE; +} + +bool LSUnit::isReady(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + const unsigned Index = IR.getSourceIndex(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + assert((IsALoad || IsAStore) && "Not a memory operation!"); + assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!"); + assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!"); + + if (IsALoad && !LoadBarriers.empty()) { + unsigned LoadBarrierIndex = *LoadBarriers.begin(); + // A younger load cannot pass a older load barrier. + if (Index > LoadBarrierIndex) + return false; + // A load barrier cannot pass a older load. + if (Index == LoadBarrierIndex && Index != *LoadQueue.begin()) + return false; + } + + if (IsAStore && !StoreBarriers.empty()) { + unsigned StoreBarrierIndex = *StoreBarriers.begin(); + // A younger store cannot pass a older store barrier. + if (Index > StoreBarrierIndex) + return false; + // A store barrier cannot pass a older store. + if (Index == StoreBarrierIndex && Index != *StoreQueue.begin()) + return false; + } + + // A load may not pass a previous store unless flag 'NoAlias' is set. + // A load may pass a previous load. + if (NoAlias && IsALoad) + return true; + + if (StoreQueue.size()) { + // A load may not pass a previous store. + // A store may not pass a previous store. + if (Index > *StoreQueue.begin()) + return false; + } + + // Okay, we are older than the oldest store in the queue. + // If there are no pending loads, then we can say for sure that this + // instruction is ready. + if (isLQEmpty()) + return true; + + // Check if there are no older loads. + if (Index <= *LoadQueue.begin()) + return true; + + // There is at least one younger load. + // + // A store may not pass a previous load. + // A load may pass a previous load. + return !IsAStore; +} + +void LSUnit::onInstructionExecuted(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + const unsigned Index = IR.getSourceIndex(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + + if (IsALoad) { + if (LoadQueue.erase(Index)) { + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the load queue.\n"); + } + if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) { + LLVM_DEBUG( + dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the set of load barriers.\n"); + LoadBarriers.erase(Index); + } + } + + if (IsAStore) { + if (StoreQueue.erase(Index)) { + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the store queue.\n"); + } + + if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) { + LLVM_DEBUG( + dbgs() << "[LSUnit]: Instruction idx=" << Index + << " has been removed from the set of store barriers.\n"); + StoreBarriers.erase(Index); + } + } +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/HardwareUnits/RegisterFile.cpp =================================================================== --- llvm/trunk/lib/MCA/HardwareUnits/RegisterFile.cpp +++ llvm/trunk/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -0,0 +1,491 @@ +//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a register mapping file class. This class is responsible +/// for managing hardware register files and the tracking of data dependencies +/// between registers. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/RegisterFile.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri, + unsigned NumRegs) + : MRI(mri), + RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}), + ZeroRegisters(mri.getNumRegs(), false) { + initialize(SM, NumRegs); +} + +void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) { + // Create a default register file that "sees" all the machine registers + // declared by the target. The number of physical registers in the default + // register file is set equal to `NumRegs`. A value of zero for `NumRegs` + // means: this register file has an unbounded number of physical registers. + RegisterFiles.emplace_back(NumRegs); + if (!SM.hasExtraProcessorInfo()) + return; + + // For each user defined register file, allocate a RegisterMappingTracker + // object. The size of every register file, as well as the mapping between + // register files and register classes is specified via tablegen. + const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo(); + + // Skip invalid register file at index 0. + for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) { + const MCRegisterFileDesc &RF = Info.RegisterFiles[I]; + assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!"); + + // The cost of a register definition is equivalent to the number of + // physical registers that are allocated at register renaming stage. + unsigned Length = RF.NumRegisterCostEntries; + const MCRegisterCostEntry *FirstElt = + &Info.RegisterCostTable[RF.RegisterCostEntryIdx]; + addRegisterFile(RF, ArrayRef(FirstElt, Length)); + } +} + +void RegisterFile::cycleStart() { + for (RegisterMappingTracker &RMT : RegisterFiles) + RMT.NumMoveEliminated = 0; +} + +void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF, + ArrayRef Entries) { + // A default register file is always allocated at index #0. That register file + // is mainly used to count the total number of mappings created by all + // register files at runtime. Users can limit the number of available physical + // registers in register file #0 through the command line flag + // `-register-file-size`. + unsigned RegisterFileIndex = RegisterFiles.size(); + RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle, + RF.AllowZeroMoveEliminationOnly); + + // Special case where there is no register class identifier in the set. + // An empty set of register classes means: this register file contains all + // the physical registers specified by the target. + // We optimistically assume that a register can be renamed at the cost of a + // single physical register. The constructor of RegisterFile ensures that + // a RegisterMapping exists for each logical register defined by the Target. + if (Entries.empty()) + return; + + // Now update the cost of individual registers. + for (const MCRegisterCostEntry &RCE : Entries) { + const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); + for (const MCPhysReg Reg : RC) { + RegisterRenamingInfo &Entry = RegisterMappings[Reg].second; + IndexPlusCostPairTy &IPC = Entry.IndexPlusCost; + if (IPC.first && IPC.first != RegisterFileIndex) { + // The only register file that is allowed to overlap is the default + // register file at index #0. The analysis is inaccurate if register + // files overlap. + errs() << "warning: register " << MRI.getName(Reg) + << " defined in multiple register files."; + } + IPC = std::make_pair(RegisterFileIndex, RCE.Cost); + Entry.RenameAs = Reg; + Entry.AllowMoveElimination = RCE.AllowMoveElimination; + + // Assume the same cost for each sub-register. + for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) { + RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second; + if (!OtherEntry.IndexPlusCost.first && + (!OtherEntry.RenameAs || + MRI.isSuperRegister(*I, OtherEntry.RenameAs))) { + OtherEntry.IndexPlusCost = IPC; + OtherEntry.RenameAs = Reg; + } + } + } + } +} + +void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef UsedPhysRegs) { + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; + if (RegisterFileIndex) { + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + RMT.NumUsedPhysRegs += Cost; + UsedPhysRegs[RegisterFileIndex] += Cost; + } + + // Now update the default register mapping tracker. + RegisterFiles[0].NumUsedPhysRegs += Cost; + UsedPhysRegs[0] += Cost; +} + +void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef FreedPhysRegs) { + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; + if (RegisterFileIndex) { + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + RMT.NumUsedPhysRegs -= Cost; + FreedPhysRegs[RegisterFileIndex] += Cost; + } + + // Now update the default register mapping tracker. + RegisterFiles[0].NumUsedPhysRegs -= Cost; + FreedPhysRegs[0] += Cost; +} + +void RegisterFile::addRegisterWrite(WriteRef Write, + MutableArrayRef UsedPhysRegs) { + WriteState &WS = *Write.getWriteState(); + unsigned RegID = WS.getRegisterID(); + assert(RegID && "Adding an invalid register definition?"); + + LLVM_DEBUG({ + dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex() + << ", " << MRI.getName(RegID) << "]\n"; + }); + + // If RenameAs is equal to RegID, then RegID is subject to register renaming + // and false dependencies on RegID are all eliminated. + + // If RenameAs references the invalid register, then we optimistically assume + // that it can be renamed. In the absence of tablegen descriptors for register + // files, RenameAs is always set to the invalid register ID. In all other + // cases, RenameAs must be either equal to RegID, or it must reference a + // super-register of RegID. + + // If RenameAs is a super-register of RegID, then a write to RegID has always + // a false dependency on RenameAs. The only exception is for when the write + // implicitly clears the upper portion of the underlying register. + // If a write clears its super-registers, then it is renamed as `RenameAs`. + bool IsWriteZero = WS.isWriteZero(); + bool IsEliminated = WS.isEliminated(); + bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated; + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + WS.setPRF(RRI.IndexPlusCost.first); + + if (RRI.RenameAs && RRI.RenameAs != RegID) { + RegID = RRI.RenameAs; + WriteRef &OtherWrite = RegisterMappings[RegID].first; + + if (!WS.clearsSuperRegisters()) { + // The processor keeps the definition of `RegID` together with register + // `RenameAs`. Since this partial write is not renamed, no physical + // register is allocated. + ShouldAllocatePhysRegs = false; + + WriteState *OtherWS = OtherWrite.getWriteState(); + if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { + // This partial write has a false dependency on RenameAs. + assert(!IsEliminated && "Unexpected partial update!"); + OtherWS->addUser(&WS); + } + } + } + + // Update zero registers. + unsigned ZeroRegisterID = + WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); + if (IsWriteZero) { + ZeroRegisters.setBit(ZeroRegisterID); + for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) + ZeroRegisters.setBit(*I); + } else { + ZeroRegisters.clearBit(ZeroRegisterID); + for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) + ZeroRegisters.clearBit(*I); + } + + // If this is move has been eliminated, then the call to tryEliminateMove + // should have already updated all the register mappings. + if (!IsEliminated) { + // Update the mapping for register RegID including its sub-registers. + RegisterMappings[RegID].first = Write; + RegisterMappings[RegID].second.AliasRegID = 0U; + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + RegisterMappings[*I].first = Write; + RegisterMappings[*I].second.AliasRegID = 0U; + } + + // No physical registers are allocated for instructions that are optimized + // in hardware. For example, zero-latency data-dependency breaking + // instructions don't consume physical registers. + if (ShouldAllocatePhysRegs) + allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); + } + + if (!WS.clearsSuperRegisters()) + return; + + for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { + if (!IsEliminated) { + RegisterMappings[*I].first = Write; + RegisterMappings[*I].second.AliasRegID = 0U; + } + + if (IsWriteZero) + ZeroRegisters.setBit(*I); + else + ZeroRegisters.clearBit(*I); + } +} + +void RegisterFile::removeRegisterWrite( + const WriteState &WS, MutableArrayRef FreedPhysRegs) { + // Early exit if this write was eliminated. A write eliminated at register + // renaming stage generates an alias, and it is not added to the PRF. + if (WS.isEliminated()) + return; + + unsigned RegID = WS.getRegisterID(); + + assert(RegID != 0 && "Invalidating an already invalid register?"); + assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && + "Invalidating a write of unknown cycles!"); + assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); + + bool ShouldFreePhysRegs = !WS.isWriteZero(); + unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; + if (RenameAs && RenameAs != RegID) { + RegID = RenameAs; + + if (!WS.clearsSuperRegisters()) { + // Keep the definition of `RegID` together with register `RenameAs`. + ShouldFreePhysRegs = false; + } + } + + if (ShouldFreePhysRegs) + freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs); + + WriteRef &WR = RegisterMappings[RegID].first; + if (WR.getWriteState() == &WS) + WR.invalidate(); + + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); + } + + if (!WS.clearsSuperRegisters()) + return; + + for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); + } +} + +bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { + const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()]; + const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()]; + + // From and To must be owned by the same PRF. + const RegisterRenamingInfo &RRIFrom = RMFrom.second; + const RegisterRenamingInfo &RRITo = RMTo.second; + unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first; + if (RegisterFileIndex != RRITo.IndexPlusCost.first) + return false; + + // We only allow move elimination for writes that update a full physical + // register. On X86, move elimination is possible with 32-bit general purpose + // registers because writes to those registers are not partial writes. If a + // register move is a partial write, then we conservatively assume that move + // elimination fails, since it would either trigger a partial update, or the + // issue of a merge opcode. + // + // Note that this constraint may be lifted in future. For example, we could + // make this model more flexible, and let users customize the set of registers + // (i.e. register classes) that allow move elimination. + // + // For now, we assume that there is a strong correlation between registers + // that allow move elimination, and how those same registers are renamed in + // hardware. + if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) { + // Early exit if the PRF doesn't support move elimination for this register. + if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination) + return false; + if (!WS.clearsSuperRegisters()) + return false; + } + + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + if (RMT.MaxMoveEliminatedPerCycle && + RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle) + return false; + + bool IsZeroMove = ZeroRegisters[RS.getRegisterID()]; + if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove) + return false; + + MCPhysReg FromReg = RS.getRegisterID(); + MCPhysReg ToReg = WS.getRegisterID(); + + // Construct an alias. + MCPhysReg AliasReg = FromReg; + if (RRIFrom.RenameAs) + AliasReg = RRIFrom.RenameAs; + + const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second; + if (RMAlias.AliasRegID) + AliasReg = RMAlias.AliasRegID; + + if (AliasReg != ToReg) { + RegisterMappings[ToReg].second.AliasRegID = AliasReg; + for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I) + RegisterMappings[*I].second.AliasRegID = AliasReg; + } + + RMT.NumMoveEliminated++; + if (IsZeroMove) { + WS.setWriteZero(); + RS.setReadZero(); + } + WS.setEliminated(); + + return true; +} + +void RegisterFile::collectWrites(const ReadState &RS, + SmallVectorImpl &Writes) const { + unsigned RegID = RS.getRegisterID(); + assert(RegID && RegID < RegisterMappings.size()); + LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " + << MRI.getName(RegID) << '\n'); + + // Check if this is an alias. + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + if (RRI.AliasRegID) + RegID = RRI.AliasRegID; + + const WriteRef &WR = RegisterMappings[RegID].first; + if (WR.isValid()) + Writes.push_back(WR); + + // Handle potential partial register updates. + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + const WriteRef &WR = RegisterMappings[*I].first; + if (WR.isValid()) + Writes.push_back(WR); + } + + // Remove duplicate entries and resize the input vector. + if (Writes.size() > 1) { + sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { + return Lhs.getWriteState() < Rhs.getWriteState(); + }); + auto It = std::unique(Writes.begin(), Writes.end()); + Writes.resize(std::distance(Writes.begin(), It)); + } + + LLVM_DEBUG({ + for (const WriteRef &WR : Writes) { + const WriteState &WS = *WR.getWriteState(); + dbgs() << "[PRF] Found a dependent use of Register " + << MRI.getName(WS.getRegisterID()) << " (defined by instruction #" + << WR.getSourceIndex() << ")\n"; + } + }); +} + +void RegisterFile::addRegisterRead(ReadState &RS, + SmallVectorImpl &Defs) const { + unsigned RegID = RS.getRegisterID(); + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + RS.setPRF(RRI.IndexPlusCost.first); + if (RS.isIndependentFromDef()) + return; + + if (ZeroRegisters[RS.getRegisterID()]) + RS.setReadZero(); + collectWrites(RS, Defs); + RS.setDependentWrites(Defs.size()); +} + +unsigned RegisterFile::isAvailable(ArrayRef Regs) const { + SmallVector NumPhysRegs(getNumRegisterFiles()); + + // Find how many new mappings must be created for each register file. + for (const unsigned RegID : Regs) { + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; + if (Entry.first) + NumPhysRegs[Entry.first] += Entry.second; + NumPhysRegs[0] += Entry.second; + } + + unsigned Response = 0; + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + unsigned NumRegs = NumPhysRegs[I]; + if (!NumRegs) + continue; + + const RegisterMappingTracker &RMT = RegisterFiles[I]; + if (!RMT.NumPhysRegs) { + // The register file has an unbounded number of microarchitectural + // registers. + continue; + } + + if (RMT.NumPhysRegs < NumRegs) { + // The current register file is too small. This may occur if the number of + // microarchitectural registers in register file #0 was changed by the + // users via flag -reg-file-size. Alternatively, the scheduling model + // specified a too small number of registers for this register file. + LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n"); + + // FIXME: Normalize the instruction register count to match the + // NumPhysRegs value. This is a highly unusual case, and is not expected + // to occur. This normalization is hiding an inconsistency in either the + // scheduling model or in the value that the user might have specified + // for NumPhysRegs. + NumRegs = RMT.NumPhysRegs; + } + + if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs)) + Response |= (1U << I); + } + + return Response; +} + +#ifndef NDEBUG +void RegisterFile::dump() const { + for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { + const RegisterMapping &RM = RegisterMappings[I]; + const RegisterRenamingInfo &RRI = RM.second; + if (ZeroRegisters[I]) { + dbgs() << MRI.getName(I) << ", " << I + << ", PRF=" << RRI.IndexPlusCost.first + << ", Cost=" << RRI.IndexPlusCost.second + << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I] + << ","; + RM.first.dump(); + dbgs() << '\n'; + } + } + + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + dbgs() << "Register File #" << I; + const RegisterMappingTracker &RMT = RegisterFiles[I]; + dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs + << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n'; + } +} +#endif + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/HardwareUnits/ResourceManager.cpp =================================================================== --- llvm/trunk/lib/MCA/HardwareUnits/ResourceManager.cpp +++ llvm/trunk/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -0,0 +1,326 @@ +//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The classes here represent processor resource units and their management +/// strategy. These classes are managed by the Scheduler. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/ResourceManager.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" +ResourceStrategy::~ResourceStrategy() = default; + +uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) { + // This method assumes that ReadyMask cannot be zero. + uint64_t CandidateMask = ReadyMask & NextInSequenceMask; + if (CandidateMask) { + CandidateMask = PowerOf2Floor(CandidateMask); + NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); + return CandidateMask; + } + + NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; + RemovedFromNextInSequence = 0; + CandidateMask = ReadyMask & NextInSequenceMask; + + if (CandidateMask) { + CandidateMask = PowerOf2Floor(CandidateMask); + NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); + return CandidateMask; + } + + NextInSequenceMask = ResourceUnitMask; + CandidateMask = PowerOf2Floor(ReadyMask & NextInSequenceMask); + NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); + return CandidateMask; +} + +void DefaultResourceStrategy::used(uint64_t Mask) { + if (Mask > NextInSequenceMask) { + RemovedFromNextInSequence |= Mask; + return; + } + + NextInSequenceMask &= (~Mask); + if (NextInSequenceMask) + return; + + NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; + RemovedFromNextInSequence = 0; +} + +ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index, + uint64_t Mask) + : ProcResourceDescIndex(Index), ResourceMask(Mask), + BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask)>1) { + if (IsAGroup) + ResourceSizeMask = ResourceMask ^ PowerOf2Floor(ResourceMask); + else + ResourceSizeMask = (1ULL << Desc.NumUnits) - 1; + ReadyMask = ResourceSizeMask; + AvailableSlots = BufferSize == -1 ? 0U : static_cast(BufferSize); + Unavailable = false; +} + +bool ResourceState::isReady(unsigned NumUnits) const { + return (!isReserved() || isADispatchHazard()) && + countPopulation(ReadyMask) >= NumUnits; +} + +ResourceStateEvent ResourceState::isBufferAvailable() const { + if (isADispatchHazard() && isReserved()) + return RS_RESERVED; + if (!isBuffered() || AvailableSlots) + return RS_BUFFER_AVAILABLE; + return RS_BUFFER_UNAVAILABLE; +} + +#ifndef NDEBUG +void ResourceState::dump() const { + dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask + << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize + << ", AvailableSlots=" << AvailableSlots + << ", Reserved=" << Unavailable << '\n'; +} +#endif + +static unsigned getResourceStateIndex(uint64_t Mask) { + return std::numeric_limits::digits - countLeadingZeros(Mask); +} + +static std::unique_ptr +getStrategyFor(const ResourceState &RS) { + if (RS.isAResourceGroup() || RS.getNumUnits() > 1) + return llvm::make_unique(RS.getReadyMask()); + return std::unique_ptr(nullptr); +} + +ResourceManager::ResourceManager(const MCSchedModel &SM) { + computeProcResourceMasks(SM, ProcResID2Mask); + Resources.resize(SM.getNumProcResourceKinds()); + Strategies.resize(SM.getNumProcResourceKinds()); + + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + uint64_t Mask = ProcResID2Mask[I]; + unsigned Index = getResourceStateIndex(Mask); + Resources[Index] = + llvm::make_unique(*SM.getProcResource(I), I, Mask); + Strategies[Index] = getStrategyFor(*Resources[Index]); + } +} + +void ResourceManager::setCustomStrategyImpl(std::unique_ptr S, + uint64_t ResourceMask) { + unsigned Index = getResourceStateIndex(ResourceMask); + assert(Index < Resources.size() && "Invalid processor resource index!"); + assert(S && "Unexpected null strategy in input!"); + Strategies[Index] = std::move(S); +} + +unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const { + return Resources[getResourceStateIndex(Mask)]->getProcResourceID(); +} + +unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const { + return Resources[getResourceStateIndex(ResourceID)]->getNumUnits(); +} + +// Returns the actual resource consumed by this Use. +// First, is the primary resource ID. +// Second, is the specific sub-resource ID. +ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) { + unsigned Index = getResourceStateIndex(ResourceID); + ResourceState &RS = *Resources[Index]; + assert(RS.isReady() && "No available units to select!"); + + // Special case where RS is not a group, and it only declares a single + // resource unit. + if (!RS.isAResourceGroup() && RS.getNumUnits() == 1) + return std::make_pair(ResourceID, RS.getReadyMask()); + + uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask()); + if (RS.isAResourceGroup()) + return selectPipe(SubResourceID); + return std::make_pair(ResourceID, SubResourceID); +} + +void ResourceManager::use(const ResourceRef &RR) { + // Mark the sub-resource referenced by RR as used. + unsigned RSID = getResourceStateIndex(RR.first); + ResourceState &RS = *Resources[RSID]; + RS.markSubResourceAsUsed(RR.second); + // Remember to update the resource strategy for non-group resources with + // multiple units. + if (RS.getNumUnits() > 1) + Strategies[RSID]->used(RR.second); + + // If there are still available units in RR.first, + // then we are done. + if (RS.isReady()) + return; + + // Notify to other resources that RR.first is no longer available. + for (std::unique_ptr &Res : Resources) { + ResourceState &Current = *Res; + if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) + continue; + + if (Current.containsResource(RR.first)) { + unsigned Index = getResourceStateIndex(Current.getResourceMask()); + Current.markSubResourceAsUsed(RR.first); + Strategies[Index]->used(RR.first); + } + } +} + +void ResourceManager::release(const ResourceRef &RR) { + ResourceState &RS = *Resources[getResourceStateIndex(RR.first)]; + bool WasFullyUsed = !RS.isReady(); + RS.releaseSubResource(RR.second); + if (!WasFullyUsed) + return; + + for (std::unique_ptr &Res : Resources) { + ResourceState &Current = *Res; + if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) + continue; + + if (Current.containsResource(RR.first)) + Current.releaseSubResource(RR.first); + } +} + +ResourceStateEvent +ResourceManager::canBeDispatched(ArrayRef Buffers) const { + ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; + for (uint64_t Buffer : Buffers) { + ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; + Result = RS.isBufferAvailable(); + if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) + break; + } + return Result; +} + +void ResourceManager::reserveBuffers(ArrayRef Buffers) { + for (const uint64_t Buffer : Buffers) { + ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; + assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); + RS.reserveBuffer(); + + if (RS.isADispatchHazard()) { + assert(!RS.isReserved()); + RS.setReserved(); + } + } +} + +void ResourceManager::releaseBuffers(ArrayRef Buffers) { + for (const uint64_t R : Buffers) + Resources[getResourceStateIndex(R)]->releaseBuffer(); +} + +bool ResourceManager::canBeIssued(const InstrDesc &Desc) const { + return all_of( + Desc.Resources, [&](const std::pair &E) { + unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits; + unsigned Index = getResourceStateIndex(E.first); + return Resources[Index]->isReady(NumUnits); + }); +} + +// Returns true if all resources are in-order, and there is at least one +// resource which is a dispatch hazard (BufferSize = 0). +bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const { + if (!canBeIssued(Desc)) + return false; + bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) { + unsigned Index = getResourceStateIndex(BufferMask); + const ResourceState &Resource = *Resources[Index]; + return Resource.isInOrder() || Resource.isADispatchHazard(); + }); + if (!AllInOrderResources) + return false; + + return any_of(Desc.Buffers, [&](uint64_t BufferMask) { + return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard(); + }); +} + +void ResourceManager::issueInstruction( + const InstrDesc &Desc, + SmallVectorImpl> &Pipes) { + for (const std::pair &R : Desc.Resources) { + const CycleSegment &CS = R.second.CS; + if (!CS.size()) { + releaseResource(R.first); + continue; + } + + assert(CS.begin() == 0 && "Invalid {Start, End} cycles!"); + if (!R.second.isReserved()) { + ResourceRef Pipe = selectPipe(R.first); + use(Pipe); + BusyResources[Pipe] += CS.size(); + // Replace the resource mask with a valid processor resource index. + const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)]; + Pipe.first = RS.getProcResourceID(); + Pipes.emplace_back(std::pair( + Pipe, ResourceCycles(CS.size()))); + } else { + assert((countPopulation(R.first) > 1) && "Expected a group!"); + // Mark this group as reserved. + assert(R.second.isReserved()); + reserveResource(R.first); + BusyResources[ResourceRef(R.first, R.first)] += CS.size(); + } + } +} + +void ResourceManager::cycleEvent(SmallVectorImpl &ResourcesFreed) { + for (std::pair &BR : BusyResources) { + if (BR.second) + BR.second--; + if (!BR.second) { + // Release this resource. + const ResourceRef &RR = BR.first; + + if (countPopulation(RR.first) == 1) + release(RR); + + releaseResource(RR.first); + ResourcesFreed.push_back(RR); + } + } + + for (const ResourceRef &RF : ResourcesFreed) + BusyResources.erase(RF); +} + +void ResourceManager::reserveResource(uint64_t ResourceID) { + ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; + assert(!Resource.isReserved()); + Resource.setReserved(); +} + +void ResourceManager::releaseResource(uint64_t ResourceID) { + ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; + Resource.clearReserved(); +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/HardwareUnits/RetireControlUnit.cpp =================================================================== --- llvm/trunk/lib/MCA/HardwareUnits/RetireControlUnit.cpp +++ llvm/trunk/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -0,0 +1,88 @@ +//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file simulates the hardware responsible for retiring instructions. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) + : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), + AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + // Check if the scheduling model provides extra information about the machine + // processor. If so, then use that information to set the reorder buffer size + // and the maximum number of instructions retired per cycle. + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (EPI.ReorderBufferSize) + AvailableSlots = EPI.ReorderBufferSize; + MaxRetirePerCycle = EPI.MaxRetirePerCycle; + } + + assert(AvailableSlots && "Invalid reorder buffer size!"); + Queue.resize(AvailableSlots); +} + +// Reserves a number of slots, and returns a new token. +unsigned RetireControlUnit::reserveSlot(const InstRef &IR, + unsigned NumMicroOps) { + assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); + unsigned NormalizedQuantity = + std::min(NumMicroOps, static_cast(Queue.size())); + // Zero latency instructions may have zero uOps. Artificially bump this + // value to 1. Although zero latency instructions don't consume scheduler + // resources, they still consume one slot in the retire queue. + NormalizedQuantity = std::max(NormalizedQuantity, 1U); + unsigned TokenID = NextAvailableSlotIdx; + Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; + NextAvailableSlotIdx += NormalizedQuantity; + NextAvailableSlotIdx %= Queue.size(); + AvailableSlots -= NormalizedQuantity; + return TokenID; +} + +const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { + return Queue[CurrentInstructionSlotIdx]; +} + +void RetireControlUnit::consumeCurrentToken() { + RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; + assert(Current.NumSlots && "Reserved zero slots?"); + assert(Current.IR && "Invalid RUToken in the RCU queue."); + Current.IR.getInstruction()->retire(); + + // Update the slot index to be the next item in the circular queue. + CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx %= Queue.size(); + AvailableSlots += Current.NumSlots; +} + +void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { + assert(Queue.size() > TokenID); + assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); + Queue[TokenID].Executed = true; +} + +#ifndef NDEBUG +void RetireControlUnit::dump() const { + dbgs() << "Retire Unit: { Total Slots=" << Queue.size() + << ", Available Slots=" << AvailableSlots << " }\n"; +} +#endif + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp =================================================================== --- llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp +++ llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp @@ -0,0 +1,245 @@ +//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A scheduler for processor resource units and processor resource groups. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +void Scheduler::initializeStrategy(std::unique_ptr S) { + // Ensure we have a valid (non-null) strategy object. + Strategy = S ? std::move(S) : llvm::make_unique(); +} + +// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. +SchedulerStrategy::~SchedulerStrategy() = default; +DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; + +#ifndef NDEBUG +void Scheduler::dump() const { + dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; + dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; + dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; + Resources->dump(); +} +#endif + +Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + + switch (Resources->canBeDispatched(Desc.Buffers)) { + case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: + return Scheduler::SC_BUFFERS_FULL; + case ResourceStateEvent::RS_RESERVED: + return Scheduler::SC_DISPATCH_GROUP_STALL; + case ResourceStateEvent::RS_BUFFER_AVAILABLE: + break; + } + + // Give lower priority to LSUnit stall events. + switch (LSU.isAvailable(IR)) { + case LSUnit::LSU_LQUEUE_FULL: + return Scheduler::SC_LOAD_QUEUE_FULL; + case LSUnit::LSU_SQUEUE_FULL: + return Scheduler::SC_STORE_QUEUE_FULL; + case LSUnit::LSU_AVAILABLE: + return Scheduler::SC_AVAILABLE; + } + + llvm_unreachable("Don't know how to process this LSU state result!"); +} + +void Scheduler::issueInstructionImpl( + InstRef &IR, + SmallVectorImpl> &UsedResources) { + Instruction *IS = IR.getInstruction(); + const InstrDesc &D = IS->getDesc(); + + // Issue the instruction and collect all the consumed resources + // into a vector. That vector is then used to notify the listener. + Resources->issueInstruction(D, UsedResources); + + // Notify the instruction that it started executing. + // This updates the internal state of each write. + IS->execute(); + + if (IS->isExecuting()) + IssuedSet.emplace_back(IR); + else if (IS->isExecuted()) + LSU.onInstructionExecuted(IR); +} + +// Release the buffered resources and issue the instruction. +void Scheduler::issueInstruction( + InstRef &IR, + SmallVectorImpl> &UsedResources, + SmallVectorImpl &ReadyInstructions) { + const Instruction &Inst = *IR.getInstruction(); + bool HasDependentUsers = Inst.hasDependentUsers(); + + Resources->releaseBuffers(Inst.getDesc().Buffers); + issueInstructionImpl(IR, UsedResources); + // Instructions that have been issued during this cycle might have unblocked + // other dependent instructions. Dependent instructions may be issued during + // this same cycle if operands have ReadAdvance entries. Promote those + // instructions to the ReadySet and notify the caller that those are ready. + if (HasDependentUsers) + promoteToReadySet(ReadyInstructions); +} + +void Scheduler::promoteToReadySet(SmallVectorImpl &Ready) { + // Scan the set of waiting instructions and promote them to the + // ready queue if operands are all ready. + unsigned RemovedElements = 0; + for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR) + break; + + // Check if this instruction is now ready. In case, force + // a transition in state using method 'update()'. + Instruction &IS = *IR.getInstruction(); + if (!IS.isReady()) + IS.update(); + + // Check if there are still unsolved data dependencies. + if (!isReady(IR)) { + ++I; + continue; + } + + Ready.emplace_back(IR); + ReadySet.emplace_back(IR); + + IR.invalidate(); + ++RemovedElements; + std::iter_swap(I, E - RemovedElements); + } + + WaitSet.resize(WaitSet.size() - RemovedElements); +} + +InstRef Scheduler::select() { + unsigned QueueIndex = ReadySet.size(); + for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { + const InstRef &IR = ReadySet[I]; + if (QueueIndex == ReadySet.size() || + Strategy->compare(IR, ReadySet[QueueIndex])) { + const InstrDesc &D = IR.getInstruction()->getDesc(); + if (Resources->canBeIssued(D)) + QueueIndex = I; + } + } + + if (QueueIndex == ReadySet.size()) + return InstRef(); + + // We found an instruction to issue. + InstRef IR = ReadySet[QueueIndex]; + std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); + ReadySet.pop_back(); + return IR; +} + +void Scheduler::updateIssuedSet(SmallVectorImpl &Executed) { + unsigned RemovedElements = 0; + for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR) + break; + Instruction &IS = *IR.getInstruction(); + if (!IS.isExecuted()) { + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR + << " is still executing.\n"); + ++I; + continue; + } + + // Instruction IR has completed execution. + LSU.onInstructionExecuted(IR); + Executed.emplace_back(IR); + ++RemovedElements; + IR.invalidate(); + std::iter_swap(I, E - RemovedElements); + } + + IssuedSet.resize(IssuedSet.size() - RemovedElements); +} + +void Scheduler::cycleEvent(SmallVectorImpl &Freed, + SmallVectorImpl &Executed, + SmallVectorImpl &Ready) { + // Release consumed resources. + Resources->cycleEvent(Freed); + + // Propagate the cycle event to the 'Issued' and 'Wait' sets. + for (InstRef &IR : IssuedSet) + IR.getInstruction()->cycleEvent(); + + updateIssuedSet(Executed); + + for (InstRef &IR : WaitSet) + IR.getInstruction()->cycleEvent(); + + promoteToReadySet(Ready); +} + +bool Scheduler::mustIssueImmediately(const InstRef &IR) const { + // Instructions that use an in-order dispatch/issue processor resource must be + // issued immediately to the pipeline(s). Any other in-order buffered + // resources (i.e. BufferSize=1) is consumed. + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc); +} + +void Scheduler::dispatch(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + Resources->reserveBuffers(Desc.Buffers); + + // If necessary, reserve queue entries in the load-store unit (LSU). + bool IsMemOp = Desc.MayLoad || Desc.MayStore; + if (IsMemOp) + LSU.dispatch(IR); + + if (!isReady(IR)) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); + WaitSet.push_back(IR); + return; + } + + // Don't add a zero-latency instruction to the Ready queue. + // A zero-latency instruction doesn't consume any scheduler resources. That is + // because it doesn't need to be executed, and it is often removed at register + // renaming stage. For example, register-register moves are often optimized at + // register renaming stage by simply updating register aliases. On some + // targets, zero-idiom instructions (for example: a xor that clears the value + // of a register) are treated specially, and are often eliminated at register + // renaming stage. + if (!mustIssueImmediately(IR)) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); + ReadySet.push_back(IR); + } +} + +bool Scheduler::isReady(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + bool IsMemOp = Desc.MayLoad || Desc.MayStore; + return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR)); +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/InstrBuilder.cpp =================================================================== --- llvm/trunk/lib/MCA/InstrBuilder.cpp +++ llvm/trunk/lib/MCA/InstrBuilder.cpp @@ -0,0 +1,675 @@ +//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the InstrBuilder interface. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/InstrBuilder.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, + const llvm::MCInstrInfo &mcii, + const llvm::MCRegisterInfo &mri, + const llvm::MCInstrAnalysis &mcia) + : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), + FirstReturnInst(true) { + computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); +} + +static void initializeUsedResources(InstrDesc &ID, + const MCSchedClassDesc &SCDesc, + const MCSubtargetInfo &STI, + ArrayRef ProcResourceMasks) { + const MCSchedModel &SM = STI.getSchedModel(); + + // Populate resources consumed. + using ResourcePlusCycles = std::pair; + std::vector Worklist; + + // Track cycles contributed by resources that are in a "Super" relationship. + // This is required if we want to correctly match the behavior of method + // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set + // of "consumed" processor resources and resource cycles, the logic in + // ExpandProcResource() doesn't update the number of resource cycles + // contributed by a "Super" resource to a group. + // We need to take this into account when we find that a processor resource is + // part of a group, and it is also used as the "Super" of other resources. + // This map stores the number of cycles contributed by sub-resources that are + // part of a "Super" resource. The key value is the "Super" resource mask ID. + DenseMap SuperResources; + + unsigned NumProcResources = SM.getNumProcResourceKinds(); + APInt Buffers(NumProcResources, 0); + + for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { + const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; + const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); + uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; + if (PR.BufferSize != -1) + Buffers.setBit(PRE->ProcResourceIdx); + CycleSegment RCy(0, PRE->Cycles, false); + Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); + if (PR.SuperIdx) { + uint64_t Super = ProcResourceMasks[PR.SuperIdx]; + SuperResources[Super] += PRE->Cycles; + } + } + + // Sort elements by mask popcount, so that we prioritize resource units over + // resource groups, and smaller groups over larger groups. + sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { + unsigned popcntA = countPopulation(A.first); + unsigned popcntB = countPopulation(B.first); + if (popcntA < popcntB) + return true; + if (popcntA > popcntB) + return false; + return A.first < B.first; + }); + + uint64_t UsedResourceUnits = 0; + + // Remove cycles contributed by smaller resources. + for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { + ResourcePlusCycles &A = Worklist[I]; + if (!A.second.size()) { + A.second.NumUnits = 0; + A.second.setReserved(); + ID.Resources.emplace_back(A); + continue; + } + + ID.Resources.emplace_back(A); + uint64_t NormalizedMask = A.first; + if (countPopulation(A.first) == 1) { + UsedResourceUnits |= A.first; + } else { + // Remove the leading 1 from the resource group mask. + NormalizedMask ^= PowerOf2Floor(NormalizedMask); + } + + for (unsigned J = I + 1; J < E; ++J) { + ResourcePlusCycles &B = Worklist[J]; + if ((NormalizedMask & B.first) == NormalizedMask) { + B.second.CS.subtract(A.second.size() - SuperResources[A.first]); + if (countPopulation(B.first) > 1) + B.second.NumUnits++; + } + } + } + + // A SchedWrite may specify a number of cycles in which a resource group + // is reserved. For example (on target x86; cpu Haswell): + // + // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { + // let ResourceCycles = [2, 2, 3]; + // } + // + // This means: + // Resource units HWPort0 and HWPort1 are both used for 2cy. + // Resource group HWPort01 is the union of HWPort0 and HWPort1. + // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 + // will not be usable for 2 entire cycles from instruction issue. + // + // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency + // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an + // extra delay on top of the 2 cycles latency. + // During those extra cycles, HWPort01 is not usable by other instructions. + for (ResourcePlusCycles &RPC : ID.Resources) { + if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { + // Remove the leading 1 from the resource group mask. + uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); + if ((Mask & UsedResourceUnits) == Mask) + RPC.second.setReserved(); + } + } + + // Identify extra buffers that are consumed through super resources. + for (const std::pair &SR : SuperResources) { + for (unsigned I = 1, E = NumProcResources; I < E; ++I) { + const MCProcResourceDesc &PR = *SM.getProcResource(I); + if (PR.BufferSize == -1) + continue; + + uint64_t Mask = ProcResourceMasks[I]; + if (Mask != SR.first && ((Mask & SR.first) == SR.first)) + Buffers.setBit(I); + } + } + + // Now set the buffers. + if (unsigned NumBuffers = Buffers.countPopulation()) { + ID.Buffers.resize(NumBuffers); + for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { + if (Buffers[I]) { + --NumBuffers; + ID.Buffers[NumBuffers] = ProcResourceMasks[I]; + } + } + } + + LLVM_DEBUG({ + for (const std::pair &R : ID.Resources) + dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n'; + for (const uint64_t R : ID.Buffers) + dbgs() << "\t\tBuffer Mask=" << R << '\n'; + }); +} + +static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, + const MCSchedClassDesc &SCDesc, + const MCSubtargetInfo &STI) { + if (MCDesc.isCall()) { + // We cannot estimate how long this call will take. + // Artificially set an arbitrarily high latency (100cy). + ID.MaxLatency = 100U; + return; + } + + int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); + // If latency is unknown, then conservatively assume a MaxLatency of 100cy. + ID.MaxLatency = Latency < 0 ? 100U : static_cast(Latency); +} + +static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { + // Count register definitions, and skip non register operands in the process. + unsigned I, E; + unsigned NumExplicitDefs = MCDesc.getNumDefs(); + for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { + const MCOperand &Op = MCI.getOperand(I); + if (Op.isReg()) + --NumExplicitDefs; + } + + if (NumExplicitDefs) { + return make_error>( + "Expected more register operand definitions.", MCI); + } + + if (MCDesc.hasOptionalDef()) { + // Always assume that the optional definition is the last operand. + const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); + if (I == MCI.getNumOperands() || !Op.isReg()) { + std::string Message = + "expected a register operand for an optional definition. Instruction " + "has not been correctly analyzed."; + return make_error>(Message, MCI); + } + } + + return ErrorSuccess(); +} + +void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, + unsigned SchedClassID) { + const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); + const MCSchedModel &SM = STI.getSchedModel(); + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + + // Assumptions made by this algorithm: + // 1. The number of explicit and implicit register definitions in a MCInst + // matches the number of explicit and implicit definitions according to + // the opcode descriptor (MCInstrDesc). + // 2. Uses start at index #(MCDesc.getNumDefs()). + // 3. There can only be a single optional register definition, an it is + // always the last operand of the sequence (excluding extra operands + // contributed by variadic opcodes). + // + // These assumptions work quite well for most out-of-order in-tree targets + // like x86. This is mainly because the vast majority of instructions is + // expanded to MCInst using a straightforward lowering logic that preserves + // the ordering of the operands. + // + // About assumption 1. + // The algorithm allows non-register operands between register operand + // definitions. This helps to handle some special ARM instructions with + // implicit operand increment (-mtriple=armv7): + // + // vld1.32 {d18, d19}, [r1]! @ + // @ (!!) + // @ + // @ + // @ + // @ > + // + // MCDesc reports: + // 6 explicit operands. + // 1 optional definition + // 2 explicit definitions (!!) + // + // The presence of an 'Imm' operand between the two register definitions + // breaks the assumption that "register definitions are always at the + // beginning of the operand sequence". + // + // To workaround this issue, this algorithm ignores (i.e. skips) any + // non-register operands between register definitions. The optional + // definition is still at index #(NumOperands-1). + // + // According to assumption 2. register reads start at #(NumExplicitDefs-1). + // That means, register R1 from the example is both read and written. + unsigned NumExplicitDefs = MCDesc.getNumDefs(); + unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); + unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; + unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; + if (MCDesc.hasOptionalDef()) + TotalDefs++; + + unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); + ID.Writes.resize(TotalDefs + NumVariadicOps); + // Iterate over the operands list, and skip non-register operands. + // The first NumExplictDefs register operands are expected to be register + // definitions. + unsigned CurrentDef = 0; + unsigned i = 0; + for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { + const MCOperand &Op = MCI.getOperand(i); + if (!Op.isReg()) + continue; + + WriteDescriptor &Write = ID.Writes[CurrentDef]; + Write.OpIndex = i; + if (CurrentDef < NumWriteLatencyEntries) { + const MCWriteLatencyEntry &WLE = + *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); + // Conservatively default to MaxLatency. + Write.Latency = + WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); + Write.SClassOrWriteResourceID = WLE.WriteResourceID; + } else { + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + } + Write.IsOptionalDef = false; + LLVM_DEBUG({ + dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + CurrentDef++; + } + + assert(CurrentDef == NumExplicitDefs && + "Expected more register operand definitions."); + for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { + unsigned Index = NumExplicitDefs + CurrentDef; + WriteDescriptor &Write = ID.Writes[Index]; + Write.OpIndex = ~CurrentDef; + Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; + if (Index < NumWriteLatencyEntries) { + const MCWriteLatencyEntry &WLE = + *STI.getWriteLatencyEntry(&SCDesc, Index); + // Conservatively default to MaxLatency. + Write.Latency = + WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); + Write.SClassOrWriteResourceID = WLE.WriteResourceID; + } else { + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + } + + Write.IsOptionalDef = false; + assert(Write.RegisterID != 0 && "Expected a valid phys register!"); + LLVM_DEBUG({ + dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex + << ", PhysReg=" << MRI.getName(Write.RegisterID) + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + } + + if (MCDesc.hasOptionalDef()) { + WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; + Write.OpIndex = MCDesc.getNumOperands() - 1; + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + Write.IsOptionalDef = true; + LLVM_DEBUG({ + dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + } + + if (!NumVariadicOps) + return; + + // FIXME: if an instruction opcode is flagged 'mayStore', and it has no + // "unmodeledSideEffects', then this logic optimistically assumes that any + // extra register operands in the variadic sequence is not a register + // definition. + // + // Otherwise, we conservatively assume that any register operand from the + // variadic sequence is both a register read and a register write. + bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && + !MCDesc.hasUnmodeledSideEffects(); + CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); + for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); + I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { + const MCOperand &Op = MCI.getOperand(OpIndex); + if (!Op.isReg()) + continue; + + WriteDescriptor &Write = ID.Writes[CurrentDef]; + Write.OpIndex = OpIndex; + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + Write.IsOptionalDef = false; + ++CurrentDef; + LLVM_DEBUG({ + dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + } + + ID.Writes.resize(CurrentDef); +} + +void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, + unsigned SchedClassID) { + const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); + unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); + unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); + // Remove the optional definition. + if (MCDesc.hasOptionalDef()) + --NumExplicitUses; + unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); + unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; + ID.Reads.resize(TotalUses); + unsigned CurrentUse = 0; + for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; + ++I, ++OpIndex) { + const MCOperand &Op = MCI.getOperand(OpIndex); + if (!Op.isReg()) + continue; + + ReadDescriptor &Read = ID.Reads[CurrentUse]; + Read.OpIndex = OpIndex; + Read.UseIndex = I; + Read.SchedClassID = SchedClassID; + ++CurrentUse; + LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex + << ", UseIndex=" << Read.UseIndex << '\n'); + } + + // For the purpose of ReadAdvance, implicit uses come directly after explicit + // uses. The "UseIndex" must be updated according to that implicit layout. + for (unsigned I = 0; I < NumImplicitUses; ++I) { + ReadDescriptor &Read = ID.Reads[CurrentUse + I]; + Read.OpIndex = ~I; + Read.UseIndex = NumExplicitUses + I; + Read.RegisterID = MCDesc.getImplicitUses()[I]; + Read.SchedClassID = SchedClassID; + LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex + << ", UseIndex=" << Read.UseIndex << ", RegisterID=" + << MRI.getName(Read.RegisterID) << '\n'); + } + + CurrentUse += NumImplicitUses; + + // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no + // "unmodeledSideEffects", then this logic optimistically assumes that any + // extra register operands in the variadic sequence are not register + // definition. + + bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && + !MCDesc.hasUnmodeledSideEffects(); + for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); + I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { + const MCOperand &Op = MCI.getOperand(OpIndex); + if (!Op.isReg()) + continue; + + ReadDescriptor &Read = ID.Reads[CurrentUse]; + Read.OpIndex = OpIndex; + Read.UseIndex = NumExplicitUses + NumImplicitUses + I; + Read.SchedClassID = SchedClassID; + ++CurrentUse; + LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex + << ", UseIndex=" << Read.UseIndex << '\n'); + } + + ID.Reads.resize(CurrentUse); +} + +Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, + const MCInst &MCI) const { + if (ID.NumMicroOps != 0) + return ErrorSuccess(); + + bool UsesMemory = ID.MayLoad || ID.MayStore; + bool UsesBuffers = !ID.Buffers.empty(); + bool UsesResources = !ID.Resources.empty(); + if (!UsesMemory && !UsesBuffers && !UsesResources) + return ErrorSuccess(); + + StringRef Message; + if (UsesMemory) { + Message = "found an inconsistent instruction that decodes " + "into zero opcodes and that consumes load/store " + "unit resources."; + } else { + Message = "found an inconsistent instruction that decodes " + "to zero opcodes and that consumes scheduler " + "resources."; + } + + return make_error>(Message, MCI); +} + +Expected +InstrBuilder::createInstrDescImpl(const MCInst &MCI) { + assert(STI.getSchedModel().hasInstrSchedModel() && + "Itineraries are not yet supported!"); + + // Obtain the instruction descriptor from the opcode. + unsigned short Opcode = MCI.getOpcode(); + const MCInstrDesc &MCDesc = MCII.get(Opcode); + const MCSchedModel &SM = STI.getSchedModel(); + + // Then obtain the scheduling class information from the instruction. + unsigned SchedClassID = MCDesc.getSchedClass(); + bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); + + // Try to solve variant scheduling classes. + if (IsVariant) { + unsigned CPUID = SM.getProcessorID(); + while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) + SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); + + if (!SchedClassID) { + return make_error>( + "unable to resolve scheduling class for write variant.", MCI); + } + } + + // Check if this instruction is supported. Otherwise, report an error. + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { + return make_error>( + "found an unsupported instruction in the input assembly sequence.", + MCI); + } + + // Create a new empty descriptor. + std::unique_ptr ID = llvm::make_unique(); + ID->NumMicroOps = SCDesc.NumMicroOps; + + if (MCDesc.isCall() && FirstCallInst) { + // We don't correctly model calls. + WithColor::warning() << "found a call in the input assembly sequence.\n"; + WithColor::note() << "call instructions are not correctly modeled. " + << "Assume a latency of 100cy.\n"; + FirstCallInst = false; + } + + if (MCDesc.isReturn() && FirstReturnInst) { + WithColor::warning() << "found a return instruction in the input" + << " assembly sequence.\n"; + WithColor::note() << "program counter updates are ignored.\n"; + FirstReturnInst = false; + } + + ID->MayLoad = MCDesc.mayLoad(); + ID->MayStore = MCDesc.mayStore(); + ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); + + initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); + computeMaxLatency(*ID, MCDesc, SCDesc, STI); + + if (Error Err = verifyOperands(MCDesc, MCI)) + return std::move(Err); + + populateWrites(*ID, MCI, SchedClassID); + populateReads(*ID, MCI, SchedClassID); + + LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); + LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); + + // Sanity check on the instruction descriptor. + if (Error Err = verifyInstrDesc(*ID, MCI)) + return std::move(Err); + + // Now add the new descriptor. + SchedClassID = MCDesc.getSchedClass(); + bool IsVariadic = MCDesc.isVariadic(); + if (!IsVariadic && !IsVariant) { + Descriptors[MCI.getOpcode()] = std::move(ID); + return *Descriptors[MCI.getOpcode()]; + } + + VariantDescriptors[&MCI] = std::move(ID); + return *VariantDescriptors[&MCI]; +} + +Expected +InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { + if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) + return *Descriptors[MCI.getOpcode()]; + + if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) + return *VariantDescriptors[&MCI]; + + return createInstrDescImpl(MCI); +} + +Expected> +InstrBuilder::createInstruction(const MCInst &MCI) { + Expected DescOrErr = getOrCreateInstrDesc(MCI); + if (!DescOrErr) + return DescOrErr.takeError(); + const InstrDesc &D = *DescOrErr; + std::unique_ptr NewIS = llvm::make_unique(D); + + // Check if this is a dependency breaking instruction. + APInt Mask; + + unsigned ProcID = STI.getSchedModel().getProcessorID(); + bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID); + bool IsDepBreaking = + IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID); + if (MCIA.isOptimizableRegisterMove(MCI, ProcID)) + NewIS->setOptimizableMove(); + + // Initialize Reads first. + for (const ReadDescriptor &RD : D.Reads) { + int RegID = -1; + if (!RD.isImplicitRead()) { + // explicit read. + const MCOperand &Op = MCI.getOperand(RD.OpIndex); + // Skip non-register operands. + if (!Op.isReg()) + continue; + RegID = Op.getReg(); + } else { + // Implicit read. + RegID = RD.RegisterID; + } + + // Skip invalid register operands. + if (!RegID) + continue; + + // Okay, this is a register operand. Create a ReadState for it. + assert(RegID > 0 && "Invalid register ID found!"); + NewIS->getUses().emplace_back(RD, RegID); + ReadState &RS = NewIS->getUses().back(); + + if (IsDepBreaking) { + // A mask of all zeroes means: explicit input operands are not + // independent. + if (Mask.isNullValue()) { + if (!RD.isImplicitRead()) + RS.setIndependentFromDef(); + } else { + // Check if this register operand is independent according to `Mask`. + // Note that Mask may not have enough bits to describe all explicit and + // implicit input operands. If this register operand doesn't have a + // corresponding bit in Mask, then conservatively assume that it is + // dependent. + if (Mask.getBitWidth() > RD.UseIndex) { + // Okay. This map describe register use `RD.UseIndex`. + if (Mask[RD.UseIndex]) + RS.setIndependentFromDef(); + } + } + } + } + + // Early exit if there are no writes. + if (D.Writes.empty()) + return std::move(NewIS); + + // Track register writes that implicitly clear the upper portion of the + // underlying super-registers using an APInt. + APInt WriteMask(D.Writes.size(), 0); + + // Now query the MCInstrAnalysis object to obtain information about which + // register writes implicitly clear the upper portion of a super-register. + MCIA.clearsSuperRegisters(MRI, MCI, WriteMask); + + // Initialize writes. + unsigned WriteIndex = 0; + for (const WriteDescriptor &WD : D.Writes) { + unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID + : MCI.getOperand(WD.OpIndex).getReg(); + // Check if this is a optional definition that references NoReg. + if (WD.IsOptionalDef && !RegID) { + ++WriteIndex; + continue; + } + + assert(RegID && "Expected a valid register ID!"); + NewIS->getDefs().emplace_back(WD, RegID, + /* ClearsSuperRegs */ WriteMask[WriteIndex], + /* WritesZero */ IsZeroIdiom); + ++WriteIndex; + } + + return std::move(NewIS); +} +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/Instruction.cpp =================================================================== --- llvm/trunk/lib/MCA/Instruction.cpp +++ llvm/trunk/lib/MCA/Instruction.cpp @@ -0,0 +1,205 @@ +//===--------------------- Instruction.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines abstractions used by the Pipeline to model register reads, +// register writes and instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +void ReadState::writeStartEvent(unsigned Cycles) { + assert(DependentWrites); + assert(CyclesLeft == UNKNOWN_CYCLES); + + // This read may be dependent on more than one write. This typically occurs + // when a definition is the result of multiple writes where at least one + // write does a partial register update. + // The HW is forced to do some extra bookkeeping to track of all the + // dependent writes, and implement a merging scheme for the partial writes. + --DependentWrites; + TotalCycles = std::max(TotalCycles, Cycles); + + if (!DependentWrites) { + CyclesLeft = TotalCycles; + IsReady = !CyclesLeft; + } +} + +void WriteState::onInstructionIssued() { + assert(CyclesLeft == UNKNOWN_CYCLES); + // Update the number of cycles left based on the WriteDescriptor info. + CyclesLeft = getLatency(); + + // Now that the time left before write-back is known, notify + // all the users. + for (const std::pair &User : Users) { + ReadState *RS = User.first; + unsigned ReadCycles = std::max(0, CyclesLeft - User.second); + RS->writeStartEvent(ReadCycles); + } + + // Notify any writes that are in a false dependency with this write. + if (PartialWrite) + PartialWrite->writeStartEvent(CyclesLeft); +} + +void WriteState::addUser(ReadState *User, int ReadAdvance) { + // If CyclesLeft is different than -1, then we don't need to + // update the list of users. We can just notify the user with + // the actual number of cycles left (which may be zero). + if (CyclesLeft != UNKNOWN_CYCLES) { + unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance); + User->writeStartEvent(ReadCycles); + return; + } + + if (llvm::find_if(Users, [&User](const std::pair &Use) { + return Use.first == User; + }) == Users.end()) { + Users.emplace_back(User, ReadAdvance); + } +} + +void WriteState::addUser(WriteState *User) { + if (CyclesLeft != UNKNOWN_CYCLES) { + User->writeStartEvent(std::max(0, CyclesLeft)); + return; + } + + assert(!PartialWrite && "PartialWrite already set!"); + PartialWrite = User; + User->setDependentWrite(this); +} + +void WriteState::cycleEvent() { + // Note: CyclesLeft can be a negative number. It is an error to + // make it an unsigned quantity because users of this write may + // specify a negative ReadAdvance. + if (CyclesLeft != UNKNOWN_CYCLES) + CyclesLeft--; + + if (DependentWriteCyclesLeft) + DependentWriteCyclesLeft--; +} + +void ReadState::cycleEvent() { + // Update the total number of cycles. + if (DependentWrites && TotalCycles) { + --TotalCycles; + return; + } + + // Bail out immediately if we don't know how many cycles are left. + if (CyclesLeft == UNKNOWN_CYCLES) + return; + + if (CyclesLeft) { + --CyclesLeft; + IsReady = !CyclesLeft; + } +} + +#ifndef NDEBUG +void WriteState::dump() const { + dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID " + << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }"; +} + +void WriteRef::dump() const { + dbgs() << "IID=" << getSourceIndex() << ' '; + if (isValid()) + getWriteState()->dump(); + else + dbgs() << "(null)"; +} +#endif + +void Instruction::dispatch(unsigned RCUToken) { + assert(Stage == IS_INVALID); + Stage = IS_AVAILABLE; + RCUTokenID = RCUToken; + + // Check if input operands are already available. + update(); +} + +void Instruction::execute() { + assert(Stage == IS_READY); + Stage = IS_EXECUTING; + + // Set the cycles left before the write-back stage. + CyclesLeft = getLatency(); + + for (WriteState &WS : getDefs()) + WS.onInstructionIssued(); + + // Transition to the "executed" stage if this is a zero-latency instruction. + if (!CyclesLeft) + Stage = IS_EXECUTED; +} + +void Instruction::forceExecuted() { + assert(Stage == IS_READY && "Invalid internal state!"); + CyclesLeft = 0; + Stage = IS_EXECUTED; +} + +void Instruction::update() { + assert(isDispatched() && "Unexpected instruction stage found!"); + + if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); })) + return; + + // A partial register write cannot complete before a dependent write. + auto IsDefReady = [&](const WriteState &Def) { + if (!Def.getDependentWrite()) { + unsigned CyclesLeft = Def.getDependentWriteCyclesLeft(); + return !CyclesLeft || CyclesLeft < getLatency(); + } + return false; + }; + + if (all_of(getDefs(), IsDefReady)) + Stage = IS_READY; +} + +void Instruction::cycleEvent() { + if (isReady()) + return; + + if (isDispatched()) { + for (ReadState &Use : getUses()) + Use.cycleEvent(); + + for (WriteState &Def : getDefs()) + Def.cycleEvent(); + + update(); + return; + } + + assert(isExecuting() && "Instruction not in-flight?"); + assert(CyclesLeft && "Instruction already executed?"); + for (WriteState &Def : getDefs()) + Def.cycleEvent(); + CyclesLeft--; + if (!CyclesLeft) + Stage = IS_EXECUTED; +} + +const unsigned WriteRef::INVALID_IID = std::numeric_limits::max(); + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/LLVMBuild.txt =================================================================== --- llvm/trunk/lib/MCA/LLVMBuild.txt +++ llvm/trunk/lib/MCA/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llvm-mca/lib/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MCA +parent = Libraries +required_libraries = MC Support Index: llvm/trunk/lib/MCA/Pipeline.cpp =================================================================== --- llvm/trunk/lib/MCA/Pipeline.cpp +++ llvm/trunk/lib/MCA/Pipeline.cpp @@ -0,0 +1,97 @@ +//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements an ordered container of stages that simulate the +/// pipeline of a hardware backend. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Pipeline.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/Support/Debug.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +void Pipeline::addEventListener(HWEventListener *Listener) { + if (Listener) + Listeners.insert(Listener); + for (auto &S : Stages) + S->addListener(Listener); +} + +bool Pipeline::hasWorkToProcess() { + return any_of(Stages, [](const std::unique_ptr &S) { + return S->hasWorkToComplete(); + }); +} + +Expected Pipeline::run() { + assert(!Stages.empty() && "Unexpected empty pipeline found!"); + + do { + notifyCycleBegin(); + if (Error Err = runCycle()) + return std::move(Err); + notifyCycleEnd(); + ++Cycles; + } while (hasWorkToProcess()); + + return Cycles; +} + +Error Pipeline::runCycle() { + Error Err = ErrorSuccess(); + // Update stages before we start processing new instructions. + for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { + const std::unique_ptr &S = *I; + Err = S->cycleStart(); + } + + // Now fetch and execute new instructions. + InstRef IR; + Stage &FirstStage = *Stages[0]; + while (!Err && FirstStage.isAvailable(IR)) + Err = FirstStage.execute(IR); + + // Update stages in preparation for a new cycle. + for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { + const std::unique_ptr &S = *I; + Err = S->cycleEnd(); + } + + return Err; +} + +void Pipeline::appendStage(std::unique_ptr S) { + assert(S && "Invalid null stage in input!"); + if (!Stages.empty()) { + Stage *Last = Stages.back().get(); + Last->setNextInSequence(S.get()); + } + + Stages.push_back(std::move(S)); +} + +void Pipeline::notifyCycleBegin() { + LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n'); + for (HWEventListener *Listener : Listeners) + Listener->onCycleBegin(); +} + +void Pipeline::notifyCycleEnd() { + LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n"); + for (HWEventListener *Listener : Listeners) + Listener->onCycleEnd(); +} +} // namespace mca. +} // namespace llvm Index: llvm/trunk/lib/MCA/Stages/DispatchStage.cpp =================================================================== --- llvm/trunk/lib/MCA/Stages/DispatchStage.cpp +++ llvm/trunk/lib/MCA/Stages/DispatchStage.cpp @@ -0,0 +1,185 @@ +//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file models the dispatch component of an instruction pipeline. +/// +/// The DispatchStage is responsible for updating instruction dependencies +/// and communicating to the simulated instruction scheduler that an instruction +/// is ready to be scheduled for execution. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/DispatchStage.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +void DispatchStage::notifyInstructionDispatched(const InstRef &IR, + ArrayRef UsedRegs, + unsigned UOps) const { + LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); + notifyEvent( + HWInstructionDispatchedEvent(IR, UsedRegs, UOps)); +} + +bool DispatchStage::checkPRF(const InstRef &IR) const { + SmallVector RegDefs; + for (const WriteState &RegDef : IR.getInstruction()->getDefs()) + RegDefs.emplace_back(RegDef.getRegisterID()); + + const unsigned RegisterMask = PRF.isAvailable(RegDefs); + // A mask with all zeroes means: register files are available. + if (RegisterMask) { + notifyEvent( + HWStallEvent(HWStallEvent::RegisterFileStall, IR)); + return false; + } + + return true; +} + +bool DispatchStage::checkRCU(const InstRef &IR) const { + const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; + if (RCU.isAvailable(NumMicroOps)) + return true; + notifyEvent( + HWStallEvent(HWStallEvent::RetireControlUnitStall, IR)); + return false; +} + +bool DispatchStage::canDispatch(const InstRef &IR) const { + return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR); +} + +void DispatchStage::updateRAWDependencies(ReadState &RS, + const MCSubtargetInfo &STI) { + SmallVector DependentWrites; + + // Collect all the dependent writes, and update RS internal state. + PRF.addRegisterRead(RS, DependentWrites); + + // We know that this read depends on all the writes in DependentWrites. + // For each write, check if we have ReadAdvance information, and use it + // to figure out in how many cycles this read becomes available. + const ReadDescriptor &RD = RS.getDescriptor(); + const MCSchedModel &SM = STI.getSchedModel(); + const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); + for (WriteRef &WR : DependentWrites) { + WriteState &WS = *WR.getWriteState(); + unsigned WriteResID = WS.getWriteResourceID(); + int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); + WS.addUser(&RS, ReadAdvance); + } +} + +Error DispatchStage::dispatch(InstRef IR) { + assert(!CarryOver && "Cannot dispatch another instruction!"); + Instruction &IS = *IR.getInstruction(); + const InstrDesc &Desc = IS.getDesc(); + const unsigned NumMicroOps = Desc.NumMicroOps; + if (NumMicroOps > DispatchWidth) { + assert(AvailableEntries == DispatchWidth); + AvailableEntries = 0; + CarryOver = NumMicroOps - DispatchWidth; + CarriedOver = IR; + } else { + assert(AvailableEntries >= NumMicroOps); + AvailableEntries -= NumMicroOps; + } + + // Check if this is an optimizable reg-reg move. + bool IsEliminated = false; + if (IS.isOptimizableMove()) { + assert(IS.getDefs().size() == 1 && "Expected a single input!"); + assert(IS.getUses().size() == 1 && "Expected a single output!"); + IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]); + } + + // A dependency-breaking instruction doesn't have to wait on the register + // input operands, and it is often optimized at register renaming stage. + // Update RAW dependencies if this instruction is not a dependency-breaking + // instruction. A dependency-breaking instruction is a zero-latency + // instruction that doesn't consume hardware resources. + // An example of dependency-breaking instruction on X86 is a zero-idiom XOR. + // + // We also don't update data dependencies for instructions that have been + // eliminated at register renaming stage. + if (!IsEliminated) { + for (ReadState &RS : IS.getUses()) + updateRAWDependencies(RS, STI); + } + + // By default, a dependency-breaking zero-idiom is expected to be optimized + // at register renaming stage. That means, no physical register is allocated + // to the instruction. + SmallVector RegisterFiles(PRF.getNumRegisterFiles()); + for (WriteState &WS : IS.getDefs()) + PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); + + // Reserve slots in the RCU, and notify the instruction that it has been + // dispatched to the schedulers for execution. + IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); + + // Notify listeners of the "instruction dispatched" event, + // and move IR to the next stage. + notifyInstructionDispatched(IR, RegisterFiles, + std::min(DispatchWidth, NumMicroOps)); + return moveToTheNextStage(IR); +} + +Error DispatchStage::cycleStart() { + PRF.cycleStart(); + + if (!CarryOver) { + AvailableEntries = DispatchWidth; + return ErrorSuccess(); + } + + AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; + unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries; + CarryOver -= DispatchedOpcodes; + assert(CarriedOver && "Invalid dispatched instruction"); + + SmallVector RegisterFiles(PRF.getNumRegisterFiles(), 0U); + notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes); + if (!CarryOver) + CarriedOver = InstRef(); + return ErrorSuccess(); +} + +bool DispatchStage::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); + if (Required > AvailableEntries) + return false; + // The dispatch logic doesn't internally buffer instructions. It only accepts + // instructions that can be successfully moved to the next stage during this + // same cycle. + return canDispatch(IR); +} + +Error DispatchStage::execute(InstRef &IR) { + assert(canDispatch(IR) && "Cannot dispatch another instruction!"); + return dispatch(IR); +} + +#ifndef NDEBUG +void DispatchStage::dump() const { + PRF.dump(); + RCU.dump(); +} +#endif +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/Stages/EntryStage.cpp =================================================================== --- llvm/trunk/lib/MCA/Stages/EntryStage.cpp +++ llvm/trunk/lib/MCA/Stages/EntryStage.cpp @@ -0,0 +1,76 @@ +//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the Fetch stage of an instruction pipeline. Its sole +/// purpose in life is to produce instructions for the rest of the pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/EntryStage.h" +#include "llvm/MCA/Instruction.h" + +namespace llvm { +namespace mca { + +bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; } + +bool EntryStage::isAvailable(const InstRef & /* unused */) const { + if (CurrentInstruction) + return checkNextStage(CurrentInstruction); + return false; +} + +void EntryStage::getNextInstruction() { + assert(!CurrentInstruction && "There is already an instruction to process!"); + if (!SM.hasNext()) + return; + SourceRef SR = SM.peekNext(); + std::unique_ptr Inst = llvm::make_unique(SR.second); + CurrentInstruction = InstRef(SR.first, Inst.get()); + Instructions.emplace_back(std::move(Inst)); + SM.updateNext(); +} + +llvm::Error EntryStage::execute(InstRef & /*unused */) { + assert(CurrentInstruction && "There is no instruction to process!"); + if (llvm::Error Val = moveToTheNextStage(CurrentInstruction)) + return Val; + + // Move the program counter. + CurrentInstruction.invalidate(); + getNextInstruction(); + return llvm::ErrorSuccess(); +} + +llvm::Error EntryStage::cycleStart() { + if (!CurrentInstruction) + getNextInstruction(); + return llvm::ErrorSuccess(); +} + +llvm::Error EntryStage::cycleEnd() { + // Find the first instruction which hasn't been retired. + auto Range = make_range(&Instructions[NumRetired], Instructions.end()); + auto It = find_if(Range, [](const std::unique_ptr &I) { + return !I->isRetired(); + }); + + NumRetired = std::distance(Instructions.begin(), It); + // Erase instructions up to the first that hasn't been retired. + if ((NumRetired * 2) >= Instructions.size()) { + Instructions.erase(Instructions.begin(), It); + NumRetired = 0; + } + + return llvm::ErrorSuccess(); +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/Stages/ExecuteStage.cpp =================================================================== --- llvm/trunk/lib/MCA/Stages/ExecuteStage.cpp +++ llvm/trunk/lib/MCA/Stages/ExecuteStage.cpp @@ -0,0 +1,219 @@ +//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the execution stage of an instruction pipeline. +/// +/// The ExecuteStage is responsible for managing the hardware scheduler +/// and issuing notifications that an instruction has been executed. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/ExecuteStage.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) { + switch (Status) { + case Scheduler::SC_LOAD_QUEUE_FULL: + return HWStallEvent::LoadQueueFull; + case Scheduler::SC_STORE_QUEUE_FULL: + return HWStallEvent::StoreQueueFull; + case Scheduler::SC_BUFFERS_FULL: + return HWStallEvent::SchedulerQueueFull; + case Scheduler::SC_DISPATCH_GROUP_STALL: + return HWStallEvent::DispatchGroupStall; + case Scheduler::SC_AVAILABLE: + return HWStallEvent::Invalid; + } + + llvm_unreachable("Don't know how to process this StallKind!"); +} + +bool ExecuteStage::isAvailable(const InstRef &IR) const { + if (Scheduler::Status S = HWS.isAvailable(IR)) { + HWStallEvent::GenericEventType ET = toHWStallEventType(S); + notifyEvent(HWStallEvent(ET, IR)); + return false; + } + + return true; +} + +Error ExecuteStage::issueInstruction(InstRef &IR) { + SmallVector, 4> Used; + SmallVector Ready; + HWS.issueInstruction(IR, Used, Ready); + + notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); + notifyInstructionIssued(IR, Used); + if (IR.getInstruction()->isExecuted()) { + notifyInstructionExecuted(IR); + // FIXME: add a buffer of executed instructions. + if (Error S = moveToTheNextStage(IR)) + return S; + } + + for (const InstRef &I : Ready) + notifyInstructionReady(I); + return ErrorSuccess(); +} + +Error ExecuteStage::issueReadyInstructions() { + InstRef IR = HWS.select(); + while (IR) { + if (Error Err = issueInstruction(IR)) + return Err; + + // Select the next instruction to issue. + IR = HWS.select(); + } + + return ErrorSuccess(); +} + +Error ExecuteStage::cycleStart() { + SmallVector Freed; + SmallVector Executed; + SmallVector Ready; + + HWS.cycleEvent(Freed, Executed, Ready); + + for (const ResourceRef &RR : Freed) + notifyResourceAvailable(RR); + + for (InstRef &IR : Executed) { + notifyInstructionExecuted(IR); + // FIXME: add a buffer of executed instructions. + if (Error S = moveToTheNextStage(IR)) + return S; + } + + for (const InstRef &IR : Ready) + notifyInstructionReady(IR); + + return issueReadyInstructions(); +} + +#ifndef NDEBUG +static void verifyInstructionEliminated(const InstRef &IR) { + const Instruction &Inst = *IR.getInstruction(); + assert(Inst.isEliminated() && "Instruction was not eliminated!"); + assert(Inst.isReady() && "Instruction in an inconsistent state!"); + + // Ensure that instructions eliminated at register renaming stage are in a + // consistent state. + const InstrDesc &Desc = Inst.getDesc(); + assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!"); +} +#endif + +Error ExecuteStage::handleInstructionEliminated(InstRef &IR) { +#ifndef NDEBUG + verifyInstructionEliminated(IR); +#endif + notifyInstructionReady(IR); + notifyInstructionIssued(IR, {}); + IR.getInstruction()->forceExecuted(); + notifyInstructionExecuted(IR); + return moveToTheNextStage(IR); +} + +// Schedule the instruction for execution on the hardware. +Error ExecuteStage::execute(InstRef &IR) { + assert(isAvailable(IR) && "Scheduler is not available!"); + +#ifndef NDEBUG + // Ensure that the HWS has not stored this instruction in its queues. + HWS.sanityCheck(IR); +#endif + + if (IR.getInstruction()->isEliminated()) + return handleInstructionEliminated(IR); + + // Reserve a slot in each buffered resource. Also, mark units with + // BufferSize=0 as reserved. Resources with a buffer size of zero will only + // be released after MCIS is issued, and all the ResourceCycles for those + // units have been consumed. + HWS.dispatch(IR); + notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); + if (!HWS.isReady(IR)) + return ErrorSuccess(); + + // If we did not return early, then the scheduler is ready for execution. + notifyInstructionReady(IR); + + // If we cannot issue immediately, the HWS will add IR to its ready queue for + // execution later, so we must return early here. + if (!HWS.mustIssueImmediately(IR)) + return ErrorSuccess(); + + // Issue IR to the underlying pipelines. + return issueInstruction(IR); +} + +void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const { + LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n'); + notifyEvent( + HWInstructionEvent(HWInstructionEvent::Executed, IR)); +} + +void ExecuteStage::notifyInstructionReady(const InstRef &IR) const { + LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n'); + notifyEvent( + HWInstructionEvent(HWInstructionEvent::Ready, IR)); +} + +void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const { + LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' + << RR.second << "]\n"); + for (HWEventListener *Listener : getListeners()) + Listener->onResourceAvailable(RR); +} + +void ExecuteStage::notifyInstructionIssued( + const InstRef &IR, + ArrayRef> Used) const { + LLVM_DEBUG({ + dbgs() << "[E] Instruction Issued: #" << IR << '\n'; + for (const std::pair &Resource : Used) { + dbgs() << "[E] Resource Used: [" << Resource.first.first << '.' + << Resource.first.second << "], "; + dbgs() << "cycles: " << Resource.second << '\n'; + } + }); + notifyEvent(HWInstructionIssuedEvent(IR, Used)); +} + +void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, + bool Reserved) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + if (Desc.Buffers.empty()) + return; + + SmallVector BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); + std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), + [&](uint64_t Op) { return HWS.getResourceID(Op); }); + if (Reserved) { + for (HWEventListener *Listener : getListeners()) + Listener->onReservedBuffers(IR, BufferIDs); + return; + } + + for (HWEventListener *Listener : getListeners()) + Listener->onReleasedBuffers(IR, BufferIDs); +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/Stages/InstructionTables.cpp =================================================================== --- llvm/trunk/lib/MCA/Stages/InstructionTables.cpp +++ llvm/trunk/lib/MCA/Stages/InstructionTables.cpp @@ -0,0 +1,69 @@ +//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the method InstructionTables::execute(). +/// Method execute() prints a theoretical resource pressure distribution based +/// on the information available in the scheduling model, and without running +/// the pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/InstructionTables.h" + +namespace llvm { +namespace mca { + +Error InstructionTables::execute(InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + UsedResources.clear(); + + // Identify the resources consumed by this instruction. + for (const std::pair Resource : Desc.Resources) { + // Skip zero-cycle resources (i.e., unused resources). + if (!Resource.second.size()) + continue; + unsigned Cycles = Resource.second.size(); + unsigned Index = std::distance( + Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first)); + const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index); + unsigned NumUnits = ProcResource.NumUnits; + if (!ProcResource.SubUnitsIdxBegin) { + // The number of cycles consumed by each unit. + for (unsigned I = 0, E = NumUnits; I < E; ++I) { + ResourceRef ResourceUnit = std::make_pair(Index, 1U << I); + UsedResources.emplace_back( + std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits))); + } + continue; + } + + // This is a group. Obtain the set of resources contained in this + // group. Some of these resources may implement multiple units. + // Uniformly distribute Cycles across all of the units. + for (unsigned I1 = 0; I1 < NumUnits; ++I1) { + unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1]; + const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx); + // Compute the number of cycles consumed by each resource unit. + for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) { + ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2); + UsedResources.emplace_back(std::make_pair( + ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits))); + } + } + } + + // Send a fake instruction issued event to all the views. + HWInstructionIssuedEvent Event(IR, UsedResources); + notifyEvent(Event); + return ErrorSuccess(); +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/Stages/RetireStage.cpp =================================================================== --- llvm/trunk/lib/MCA/Stages/RetireStage.cpp +++ llvm/trunk/lib/MCA/Stages/RetireStage.cpp @@ -0,0 +1,62 @@ +//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the retire stage of an instruction pipeline. +/// The RetireStage represents the process logic that interacts with the +/// simulated RetireControlUnit hardware. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/RetireStage.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +llvm::Error RetireStage::cycleStart() { + if (RCU.isEmpty()) + return llvm::ErrorSuccess(); + + const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle(); + unsigned NumRetired = 0; + while (!RCU.isEmpty()) { + if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) + break; + const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); + if (!Current.Executed) + break; + RCU.consumeCurrentToken(); + notifyInstructionRetired(Current.IR); + NumRetired++; + } + + return llvm::ErrorSuccess(); +} + +llvm::Error RetireStage::execute(InstRef &IR) { + RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID()); + return llvm::ErrorSuccess(); +} + +void RetireStage::notifyInstructionRetired(const InstRef &IR) const { + LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n'); + llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); + const Instruction &Inst = *IR.getInstruction(); + + for (const WriteState &WS : Inst.getDefs()) + PRF.removeRegisterWrite(WS, FreedRegs); + notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/Stages/Stage.cpp =================================================================== --- llvm/trunk/lib/MCA/Stages/Stage.cpp +++ llvm/trunk/lib/MCA/Stages/Stage.cpp @@ -0,0 +1,29 @@ +//===---------------------- Stage.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a stage. +/// A chain of stages compose an instruction pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/Stage.h" + +namespace llvm { +namespace mca { + +// Pin the vtable here in the implementation file. +Stage::~Stage() = default; + +void Stage::addListener(HWEventListener *Listener) { + Listeners.insert(Listener); +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/lib/MCA/Support.cpp =================================================================== --- llvm/trunk/lib/MCA/Support.cpp +++ llvm/trunk/lib/MCA/Support.cpp @@ -0,0 +1,79 @@ +//===--------------------- Support.cpp --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a few helper functions used by various pipeline +/// components. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Support.h" +#include "llvm/MC/MCSchedule.h" + +namespace llvm { +namespace mca { + +void computeProcResourceMasks(const MCSchedModel &SM, + SmallVectorImpl &Masks) { + unsigned ProcResourceID = 0; + + // Create a unique bitmask for every processor resource unit. + // Skip resource at index 0, since it always references 'InvalidUnit'. + Masks.resize(SM.getNumProcResourceKinds()); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + ProcResourceID++; + } + + // Create a unique bitmask for every processor resource group. + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (!Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + for (unsigned U = 0; U < Desc.NumUnits; ++U) { + uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]]; + Masks[I] |= OtherMask; + } + ProcResourceID++; + } +} + +double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, + unsigned NumMicroOps, + ArrayRef ProcResourceUsage) { + // The block throughput is bounded from above by the hardware dispatch + // throughput. That is because the DispatchWidth is an upper bound on the + // number of opcodes that can be part of a single dispatch group. + double Max = static_cast(NumMicroOps) / DispatchWidth; + + // The block throughput is also limited by the amount of hardware parallelism. + // The number of available resource units affects the resource pressure + // distribution, as well as how many blocks can be executed every cycle. + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned ResourceCycles = ProcResourceUsage[I]; + if (!ResourceCycles) + continue; + + const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); + double Throughput = static_cast(ResourceCycles) / MCDesc.NumUnits; + Max = std::max(Max, Throughput); + } + + // The block reciprocal throughput is computed as the MAX of: + // - (NumMicroOps / DispatchWidth) + // - (NumUnits / ResourceCycles) for every consumed processor resource. + return Max; +} + +} // namespace mca +} // namespace llvm Index: llvm/trunk/tools/llvm-mca/CMakeLists.txt =================================================================== --- llvm/trunk/tools/llvm-mca/CMakeLists.txt +++ llvm/trunk/tools/llvm-mca/CMakeLists.txt @@ -6,6 +6,7 @@ AllTargetsDescs AllTargetsDisassemblers AllTargetsInfos + MCA MC MCParser Support @@ -28,5 +29,3 @@ ) set(LLVM_MCA_SOURCE_DIR ${CURRENT_SOURCE_DIR}) -add_subdirectory(lib) -target_link_libraries(llvm-mca PRIVATE LLVMMCA) Index: llvm/trunk/tools/llvm-mca/LLVMBuild.txt =================================================================== --- llvm/trunk/tools/llvm-mca/LLVMBuild.txt +++ llvm/trunk/tools/llvm-mca/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llvm-mca parent = Tools -required_libraries = MC MCParser Support all-targets +required_libraries = MC MCA MCParser Support all-targets Index: llvm/trunk/tools/llvm-mca/PipelinePrinter.h =================================================================== --- llvm/trunk/tools/llvm-mca/PipelinePrinter.h +++ llvm/trunk/tools/llvm-mca/PipelinePrinter.h @@ -17,9 +17,9 @@ #ifndef LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H #define LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H -#include "Pipeline.h" #include "Views/View.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/Pipeline.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "llvm-mca" Index: llvm/trunk/tools/llvm-mca/Views/SummaryView.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/Views/SummaryView.cpp +++ llvm/trunk/tools/llvm-mca/Views/SummaryView.cpp @@ -14,8 +14,8 @@ //===----------------------------------------------------------------------===// #include "Views/SummaryView.h" -#include "Support.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/Support.h" #include "llvm/Support/Format.h" namespace llvm { Index: llvm/trunk/tools/llvm-mca/Views/View.h =================================================================== --- llvm/trunk/tools/llvm-mca/Views/View.h +++ llvm/trunk/tools/llvm-mca/Views/View.h @@ -16,7 +16,7 @@ #ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H #define LLVM_TOOLS_LLVM_MCA_VIEW_H -#include "HWEventListener.h" +#include "llvm/MCA/HWEventListener.h" #include "llvm/Support/raw_ostream.h" namespace llvm { Index: llvm/trunk/tools/llvm-mca/include/Context.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Context.h +++ llvm/trunk/tools/llvm-mca/include/Context.h @@ -1,68 +0,0 @@ -//===---------------------------- Context.h ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a class for holding ownership of various simulated -/// hardware units. A Context also provides a utility routine for constructing -/// a default out-of-order pipeline with fetch, dispatch, execute, and retire -/// stages. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_CONTEXT_H -#define LLVM_TOOLS_LLVM_MCA_CONTEXT_H -#include "HardwareUnits/HardwareUnit.h" -#include "InstrBuilder.h" -#include "Pipeline.h" -#include "SourceMgr.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include - -namespace llvm { -namespace mca { - -/// This is a convenience struct to hold the parameters necessary for creating -/// the pre-built "default" out-of-order pipeline. -struct PipelineOptions { - PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS, - bool NoAlias) - : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS), - StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {} - unsigned DispatchWidth; - unsigned RegisterFileSize; - unsigned LoadQueueSize; - unsigned StoreQueueSize; - bool AssumeNoAlias; -}; - -class Context { - SmallVector, 4> Hardware; - const MCRegisterInfo &MRI; - const MCSubtargetInfo &STI; - -public: - Context(const MCRegisterInfo &R, const MCSubtargetInfo &S) : MRI(R), STI(S) {} - Context(const Context &C) = delete; - Context &operator=(const Context &C) = delete; - - void addHardwareUnit(std::unique_ptr H) { - Hardware.push_back(std::move(H)); - } - - /// Construct a basic pipeline for simulating an out-of-order pipeline. - /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages. - std::unique_ptr createDefaultPipeline(const PipelineOptions &Opts, - InstrBuilder &IB, - SourceMgr &SrcMgr); -}; - -} // namespace mca -} // namespace llvm -#endif // LLVM_TOOLS_LLVM_MCA_CONTEXT_H Index: llvm/trunk/tools/llvm-mca/include/HWEventListener.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HWEventListener.h +++ llvm/trunk/tools/llvm-mca/include/HWEventListener.h @@ -1,156 +0,0 @@ -//===----------------------- HWEventListener.h ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the main interface for hardware event listeners. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H -#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H - -#include "Instruction.h" -#include "Support.h" -#include "llvm/ADT/ArrayRef.h" - -namespace llvm { -namespace mca { - -// An HWInstructionEvent represents state changes of instructions that -// listeners might be interested in. Listeners can choose to ignore any event -// they are not interested in. -class HWInstructionEvent { -public: - // This is the list of event types that are shared by all targets, that - // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent, - // ...) and generic Views can manipulate. - // Subtargets are free to define additional event types, that are goin to be - // handled by generic components as opaque values, but can still be - // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage, - // DispatchStage, ...) and interpreted by subtarget-specific EventListener - // implementations. - enum GenericEventType { - Invalid = 0, - // Events generated by the Retire Control Unit. - Retired, - // Events generated by the Scheduler. - Ready, - Issued, - Executed, - // Events generated by the Dispatch logic. - Dispatched, - - LastGenericEventType, - }; - - HWInstructionEvent(unsigned type, const InstRef &Inst) - : Type(type), IR(Inst) {} - - // The event type. The exact meaning depends on the subtarget. - const unsigned Type; - - // The instruction this event was generated for. - const InstRef &IR; -}; - -class HWInstructionIssuedEvent : public HWInstructionEvent { -public: - using ResourceRef = std::pair; - HWInstructionIssuedEvent(const InstRef &IR, - ArrayRef> UR) - : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {} - - ArrayRef> UsedResources; -}; - -class HWInstructionDispatchedEvent : public HWInstructionEvent { -public: - HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef Regs, - unsigned UOps) - : HWInstructionEvent(HWInstructionEvent::Dispatched, IR), - UsedPhysRegs(Regs), MicroOpcodes(UOps) {} - // Number of physical register allocated for this instruction. There is one - // entry per register file. - ArrayRef UsedPhysRegs; - // Number of micro opcodes dispatched. - // This field is often set to the total number of micro-opcodes specified by - // the instruction descriptor of IR. - // The only exception is when IR declares a number of micro opcodes - // which exceeds the processor DispatchWidth, and - by construction - it - // requires multiple cycles to be fully dispatched. In that particular case, - // the dispatch logic would generate more than one dispatch event (one per - // cycle), and each event would declare how many micro opcodes are effectively - // been dispatched to the schedulers. - unsigned MicroOpcodes; -}; - -class HWInstructionRetiredEvent : public HWInstructionEvent { -public: - HWInstructionRetiredEvent(const InstRef &IR, ArrayRef Regs) - : HWInstructionEvent(HWInstructionEvent::Retired, IR), - FreedPhysRegs(Regs) {} - // Number of register writes that have been architecturally committed. There - // is one entry per register file. - ArrayRef FreedPhysRegs; -}; - -// A HWStallEvent represents a pipeline stall caused by the lack of hardware -// resources. -class HWStallEvent { -public: - enum GenericEventType { - Invalid = 0, - // Generic stall events generated by the DispatchStage. - RegisterFileStall, - RetireControlUnitStall, - // Generic stall events generated by the Scheduler. - DispatchGroupStall, - SchedulerQueueFull, - LoadQueueFull, - StoreQueueFull, - LastGenericEvent - }; - - HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {} - - // The exact meaning of the stall event type depends on the subtarget. - const unsigned Type; - - // The instruction this event was generated for. - const InstRef &IR; -}; - -class HWEventListener { -public: - // Generic events generated by the pipeline. - virtual void onCycleBegin() {} - virtual void onCycleEnd() {} - - virtual void onEvent(const HWInstructionEvent &Event) {} - virtual void onEvent(const HWStallEvent &Event) {} - - using ResourceRef = std::pair; - virtual void onResourceAvailable(const ResourceRef &RRef) {} - - // Events generated by the Scheduler when buffered resources are - // consumed/freed for an instruction. - virtual void onReservedBuffers(const InstRef &Inst, - ArrayRef Buffers) {} - virtual void onReleasedBuffers(const InstRef &Inst, - ArrayRef Buffers) {} - - virtual ~HWEventListener() {} - -private: - virtual void anchor(); -}; -} // namespace mca -} // namespace llvm - -#endif Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h @@ -1,33 +0,0 @@ -//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a base class for describing a simulated hardware -/// unit. These units are used to construct a simulated backend. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H -#define LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H - -namespace llvm { -namespace mca { - -class HardwareUnit { - HardwareUnit(const HardwareUnit &H) = delete; - HardwareUnit &operator=(const HardwareUnit &H) = delete; - -public: - HardwareUnit() = default; - virtual ~HardwareUnit(); -}; - -} // namespace mca -} // namespace llvm -#endif // LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h @@ -1,207 +0,0 @@ -//===------------------------- LSUnit.h --------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A Load/Store unit class that models load/store queues and that implements -/// a simple weak memory consistency model. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H -#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H - -#include "HardwareUnits/HardwareUnit.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/MC/MCSchedule.h" - -namespace llvm { -namespace mca { - -class InstRef; -class Scheduler; - -/// A Load/Store Unit implementing a load and store queues. -/// -/// This class implements a load queue and a store queue to emulate the -/// out-of-order execution of memory operations. -/// Each load (or store) consumes an entry in the load (or store) queue. -/// -/// Rules are: -/// 1) A younger load is allowed to pass an older load only if there are no -/// stores nor barriers in between the two loads. -/// 2) An younger store is not allowed to pass an older store. -/// 3) A younger store is not allowed to pass an older load. -/// 4) A younger load is allowed to pass an older store only if the load does -/// not alias with the store. -/// -/// This class optimistically assumes that loads don't alias store operations. -/// Under this assumption, younger loads are always allowed to pass older -/// stores (this would only affects rule 4). -/// Essentially, this class doesn't perform any sort alias analysis to -/// identify aliasing loads and stores. -/// -/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be -/// set to `false` by the constructor of LSUnit. -/// -/// Note that this class doesn't know about the existence of different memory -/// types for memory operations (example: write-through, write-combining, etc.). -/// Derived classes are responsible for implementing that extra knowledge, and -/// provide different sets of rules for loads and stores by overriding method -/// `isReady()`. -/// To emulate a write-combining memory type, rule 2. must be relaxed in a -/// derived class to enable the reordering of non-aliasing store operations. -/// -/// No assumptions are made by this class on the size of the store buffer. This -/// class doesn't know how to identify cases where store-to-load forwarding may -/// occur. -/// -/// LSUnit doesn't attempt to predict whether a load or store hits or misses -/// the L1 cache. To be more specific, LSUnit doesn't know anything about -/// cache hierarchy and memory types. -/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the -/// scheduling model provides an "optimistic" load-to-use latency (which usually -/// matches the load-to-use latency for when there is a hit in the L1D). -/// Derived classes may expand this knowledge. -/// -/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor -/// memory-barrier like instructions. -/// LSUnit conservatively assumes that an instruction which `mayLoad` and has -/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it -/// serializes loads without forcing a flush of the load queue. -/// Similarly, instructions that both `mayStore` and have `unmodeled side -/// effects` are treated like store barriers. A full memory -/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side -/// effects. This is obviously inaccurate, but this is the best that we can do -/// at the moment. -/// -/// Each load/store barrier consumes one entry in the load/store queue. A -/// load/store barrier enforces ordering of loads/stores: -/// - A younger load cannot pass a load barrier. -/// - A younger store cannot pass a store barrier. -/// -/// A younger load has to wait for the memory load barrier to execute. -/// A load/store barrier is "executed" when it becomes the oldest entry in -/// the load/store queue(s). That also means, all the older loads/stores have -/// already been executed. -class LSUnit : public HardwareUnit { - // Load queue size. - // LQ_Size == 0 means that there are infinite slots in the load queue. - unsigned LQ_Size; - - // Store queue size. - // SQ_Size == 0 means that there are infinite slots in the store queue. - unsigned SQ_Size; - - // If true, loads will never alias with stores. This is the default. - bool NoAlias; - - // When a `MayLoad` instruction is dispatched to the schedulers for execution, - // the LSUnit reserves an entry in the `LoadQueue` for it. - // - // LoadQueue keeps track of all the loads that are in-flight. A load - // instruction is eventually removed from the LoadQueue when it reaches - // completion stage. That means, a load leaves the queue whe it is 'executed', - // and its value can be forwarded on the data path to outside units. - // - // This class doesn't know about the latency of a load instruction. So, it - // conservatively/pessimistically assumes that the latency of a load opcode - // matches the instruction latency. - // - // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses), - // and load/store conflicts, the latency of a load is determined by the depth - // of the load pipeline. So, we could use field `LoadLatency` in the - // MCSchedModel to model that latency. - // Field `LoadLatency` often matches the so-called 'load-to-use' latency from - // L1D, and it usually already accounts for any extra latency due to data - // forwarding. - // When doing throughput analysis, `LoadLatency` is likely to - // be a better predictor of load latency than instruction latency. This is - // particularly true when simulating code with temporal/spatial locality of - // memory accesses. - // Using `LoadLatency` (instead of the instruction latency) is also expected - // to improve the load queue allocation for long latency instructions with - // folded memory operands (See PR39829). - // - // FIXME: On some processors, load/store operations are split into multiple - // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but - // not 256-bit data types. So, a 256-bit load is effectively split into two - // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For - // simplicity, this class optimistically assumes that a load instruction only - // consumes one entry in the LoadQueue. Similarly, store instructions only - // consume a single entry in the StoreQueue. - // In future, we should reassess the quality of this design, and consider - // alternative approaches that let instructions specify the number of - // load/store queue entries which they consume at dispatch stage (See - // PR39830). - SmallSet LoadQueue; - SmallSet StoreQueue; - - void assignLQSlot(unsigned Index); - void assignSQSlot(unsigned Index); - bool isReadyNoAlias(unsigned Index) const; - - // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is - // conservatively treated as a store barrier. It forces older store to be - // executed before newer stores are issued. - SmallSet StoreBarriers; - - // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is - // conservatively treated as a load barrier. It forces older loads to execute - // before newer loads are issued. - SmallSet LoadBarriers; - - bool isSQEmpty() const { return StoreQueue.empty(); } - bool isLQEmpty() const { return LoadQueue.empty(); } - bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; } - bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; } - -public: - LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0, - bool AssumeNoAlias = false); - -#ifndef NDEBUG - void dump() const; -#endif - - enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL }; - - // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve - // IR. It also returns LSU_AVAILABLE if IR is not a memory operation. - Status isAvailable(const InstRef &IR) const; - - // Allocates load/store queue resources for IR. - // - // This method assumes that a previous call to `isAvailable(IR)` returned - // LSU_AVAILABLE, and that IR is a memory operation. - void dispatch(const InstRef &IR); - - // By default, rules are: - // 1. A store may not pass a previous store. - // 2. A load may not pass a previous store unless flag 'NoAlias' is set. - // 3. A load may pass a previous load. - // 4. A store may not pass a previous load (regardless of flag 'NoAlias'). - // 5. A load has to wait until an older load barrier is fully executed. - // 6. A store has to wait until an older store barrier is fully executed. - virtual bool isReady(const InstRef &IR) const; - - // Load and store instructions are tracked by their corresponding queues from - // dispatch until the "instruction executed" event. - // Only when a load instruction reaches the 'Executed' stage, its value - // becomes available to the users. At that point, the load no longer needs to - // be tracked by the load queue. - // FIXME: For simplicity, we optimistically assume a similar behavior for - // store instructions. In practice, store operations don't tend to leave the - // store queue until they reach the 'Retired' stage (See PR39830). - void onInstructionExecuted(const InstRef &IR); -}; - -} // namespace mca -} // namespace llvm - -#endif Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h @@ -1,239 +0,0 @@ -//===--------------------- RegisterFile.h -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a register mapping file class. This class is responsible -/// for managing hardware register files and the tracking of data dependencies -/// between registers. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H -#define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H - -#include "HardwareUnits/HardwareUnit.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSchedule.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace mca { - -class ReadState; -class WriteState; -class WriteRef; - -/// Manages hardware register files, and tracks register definitions for -/// register renaming purposes. -class RegisterFile : public HardwareUnit { - const MCRegisterInfo &MRI; - - // class RegisterMappingTracker is a physical register file (PRF) descriptor. - // There is one RegisterMappingTracker for every PRF definition in the - // scheduling model. - // - // An instance of RegisterMappingTracker tracks the number of physical - // registers available for renaming. It also tracks the number of register - // moves eliminated per cycle. - struct RegisterMappingTracker { - // The total number of physical registers that are available in this - // register file for register renaming purpouses. A value of zero for this - // field means: this register file has an unbounded number of physical - // registers. - const unsigned NumPhysRegs; - // Number of physical registers that are currently in use. - unsigned NumUsedPhysRegs; - - // Maximum number of register moves that can be eliminated by this PRF every - // cycle. A value of zero means that there is no limit in the number of - // moves which can be eliminated every cycle. - const unsigned MaxMoveEliminatedPerCycle; - - // Number of register moves eliminated during this cycle. - // - // This value is increased by one every time a register move is eliminated. - // Every new cycle, this value is reset to zero. - // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if - // NumMoveEliminated is less than MaxMoveEliminatedPerCycle. - unsigned NumMoveEliminated; - - // If set, move elimination is restricted to zero-register moves only. - bool AllowZeroMoveEliminationOnly; - - RegisterMappingTracker(unsigned NumPhysRegisters, - unsigned MaxMoveEliminated = 0U, - bool AllowZeroMoveElimOnly = false) - : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0), - MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U), - AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly) {} - }; - - // A vector of register file descriptors. This set always contains at least - // one entry. Entry at index #0 is reserved. That entry describes a register - // file with an unbounded number of physical registers that "sees" all the - // hardware registers declared by the target (i.e. all the register - // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is - // the target name). - // - // Users can limit the number of physical registers that are available in - // regsiter file #0 specifying command line flag `-register-file-size=`. - SmallVector RegisterFiles; - - // This type is used to propagate information about the owner of a register, - // and the cost of allocating it in the PRF. Register cost is defined as the - // number of physical registers consumed by the PRF to allocate a user - // register. - // - // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical - // registers. So, the cost of allocating a YMM register in BtVer2 is 2. - using IndexPlusCostPairTy = std::pair; - - // Struct RegisterRenamingInfo is used to map logical registers to register - // files. - // - // There is a RegisterRenamingInfo object for every logical register defined - // by the target. RegisteRenamingInfo objects are stored into vector - // `RegisterMappings`, and MCPhysReg IDs can be used to reference - // elements in that vector. - // - // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost` - // specifies both the owning PRF, as well as the number of physical registers - // consumed at register renaming stage. - // - // Field `AllowMoveElimination` is set for registers that are used as - // destination by optimizable register moves. - // - // Field `AliasRegID` is set by writes from register moves that have been - // eliminated at register renaming stage. A move eliminated at register - // renaming stage is effectively bypassed, and its write aliases the source - // register definition. - struct RegisterRenamingInfo { - IndexPlusCostPairTy IndexPlusCost; - MCPhysReg RenameAs; - MCPhysReg AliasRegID; - bool AllowMoveElimination; - RegisterRenamingInfo() - : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U), AliasRegID(0U), - AllowMoveElimination(false) {} - }; - - // RegisterMapping objects are mainly used to track physical register - // definitions and resolve data dependencies. - // - // Every register declared by the Target is associated with an instance of - // RegisterMapping. RegisterMapping objects keep track of writes to a logical - // register. That information is used by class RegisterFile to resolve data - // dependencies, and correctly set latencies for register uses. - // - // This implementation does not allow overlapping register files. The only - // register file that is allowed to overlap with other register files is - // register file #0. If we exclude register #0, every register is "owned" by - // at most one register file. - using RegisterMapping = std::pair; - - // There is one entry per each register defined by the target. - std::vector RegisterMappings; - - // Used to track zero registers. There is one bit for each register defined by - // the target. Bits are set for registers that are known to be zero. - APInt ZeroRegisters; - - // This method creates a new register file descriptor. - // The new register file owns all of the registers declared by register - // classes in the 'RegisterClasses' set. - // - // Processor models allow the definition of RegisterFile(s) via tablegen. For - // example, this is a tablegen definition for a x86 register file for - // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1 - // physical register). - // - // def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]> - // - // Here FPRegisterFile contains all the registers defined by register class - // VR128RegClass and VR256RegClass. FPRegisterFile implements 60 - // registers which can be used for register renaming purpose. - void addRegisterFile(const MCRegisterFileDesc &RF, - ArrayRef Entries); - - // Consumes physical registers in each register file specified by the - // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`. - void allocatePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef UsedPhysRegs); - - // Releases previously allocated physical registers from the register file(s). - // This method is called from `invalidateRegisterMapping()`. - void freePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef FreedPhysRegs); - - // Collects writes that are in a RAW dependency with RS. - // This method is called from `addRegisterRead()`. - void collectWrites(const ReadState &RS, - SmallVectorImpl &Writes) const; - - // Create an instance of RegisterMappingTracker for every register file - // specified by the processor model. - // If no register file is specified, then this method creates a default - // register file with an unbounded number of physical registers. - void initialize(const MCSchedModel &SM, unsigned NumRegs); - -public: - RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri, - unsigned NumRegs = 0); - - // This method updates the register mappings inserting a new register - // definition. This method is also responsible for updating the number of - // allocated physical registers in each register file modified by the write. - // No physical regiser is allocated if this write is from a zero-idiom. - void addRegisterWrite(WriteRef Write, MutableArrayRef UsedPhysRegs); - - // Collect writes that are in a data dependency with RS, and update RS - // internal state. - void addRegisterRead(ReadState &RS, SmallVectorImpl &Writes) const; - - // Removes write \param WS from the register mappings. - // Physical registers may be released to reflect this update. - // No registers are released if this write is from a zero-idiom. - void removeRegisterWrite(const WriteState &WS, - MutableArrayRef FreedPhysRegs); - - // Returns true if a move from RS to WS can be eliminated. - // On success, it updates WriteState by setting flag `WS.isEliminated`. - // If RS is a read from a zero register, and WS is eliminated, then - // `WS.WritesZero` is also set, so that method addRegisterWrite() would not - // reserve a physical register for it. - bool tryEliminateMove(WriteState &WS, ReadState &RS); - - // Checks if there are enough physical registers in the register files. - // Returns a "response mask" where each bit represents the response from a - // different register file. A mask of all zeroes means that all register - // files are available. Otherwise, the mask can be used to identify which - // register file was busy. This sematic allows us to classify dispatch - // stalls caused by the lack of register file resources. - // - // Current implementation can simulate up to 32 register files (including the - // special register file at index #0). - unsigned isAvailable(ArrayRef Regs) const; - - // Returns the number of PRFs implemented by this processor. - unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } - - // Notify each PRF that a new cycle just started. - void cycleStart(); - -#ifndef NDEBUG - void dump() const; -#endif -}; - -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h @@ -1,360 +0,0 @@ -//===--------------------- ResourceManager.h --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// The classes here represent processor resource units and their management -/// strategy. These classes are managed by the Scheduler. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H -#define LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H - -#include "Instruction.h" -#include "Support.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" - -namespace llvm { -namespace mca { - -/// Used to notify the internal state of a processor resource. -/// -/// A processor resource is available if it is not reserved, and there are -/// available slots in the buffer. A processor resource is unavailable if it -/// is either reserved, or the associated buffer is full. A processor resource -/// with a buffer size of -1 is always available if it is not reserved. -/// -/// Values of type ResourceStateEvent are returned by method -/// ResourceState::isBufferAvailable(), which is used to query the internal -/// state of a resource. -/// -/// The naming convention for resource state events is: -/// * Event names start with prefix RS_ -/// * Prefix RS_ is followed by a string describing the actual resource state. -enum ResourceStateEvent { - RS_BUFFER_AVAILABLE, - RS_BUFFER_UNAVAILABLE, - RS_RESERVED -}; - -/// Resource allocation strategy used by hardware scheduler resources. -class ResourceStrategy { - ResourceStrategy(const ResourceStrategy &) = delete; - ResourceStrategy &operator=(const ResourceStrategy &) = delete; - -public: - ResourceStrategy() {} - virtual ~ResourceStrategy(); - - /// Selects a processor resource unit from a ReadyMask. - virtual uint64_t select(uint64_t ReadyMask) = 0; - - /// Called by the ResourceManager when a processor resource group, or a - /// processor resource with multiple units has become unavailable. - /// - /// The default strategy uses this information to bias its selection logic. - virtual void used(uint64_t ResourceMask) {} -}; - -/// Default resource allocation strategy used by processor resource groups and -/// processor resources with multiple units. -class DefaultResourceStrategy final : public ResourceStrategy { - /// A Mask of resource unit identifiers. - /// - /// There is one bit set for every available resource unit. - /// It defaults to the value of field ResourceSizeMask in ResourceState. - const unsigned ResourceUnitMask; - - /// A simple round-robin selector for processor resource units. - /// Each bit of this mask identifies a sub resource within a group. - /// - /// As an example, lets assume that this is a default policy for a - /// processor resource group composed by the following three units: - /// ResourceA -- 0b001 - /// ResourceB -- 0b010 - /// ResourceC -- 0b100 - /// - /// Field NextInSequenceMask is used to select the next unit from the set of - /// resource units. It defaults to the value of field `ResourceUnitMasks` (in - /// this example, it defaults to mask '0b111'). - /// - /// The round-robin selector would firstly select 'ResourceC', then - /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the - /// corresponding bit in NextInSequenceMask is cleared. For example, if - /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes - /// 0xb011. - /// - /// When NextInSequenceMask becomes zero, it is automatically reset to the - /// default value (i.e. ResourceUnitMask). - uint64_t NextInSequenceMask; - - /// This field is used to track resource units that are used (i.e. selected) - /// by other groups other than the one associated with this strategy object. - /// - /// In LLVM processor resource groups are allowed to partially (or fully) - /// overlap. That means, a same unit may be visible to multiple groups. - /// This field keeps track of uses that have originated from outside of - /// this group. The idea is to bias the selection strategy, so that resources - /// that haven't been used by other groups get prioritized. - /// - /// The end goal is to (try to) keep the resource distribution as much uniform - /// as possible. By construction, this mask only tracks one-level of resource - /// usage. Therefore, this strategy is expected to be less accurate when same - /// units are used multiple times by other groups within a single round of - /// select. - /// - /// Note: an LRU selector would have a better accuracy at the cost of being - /// slightly more expensive (mostly in terms of runtime cost). Methods - /// 'select' and 'used', are always in the hot execution path of llvm-mca. - /// Therefore, a slow implementation of 'select' would have a negative impact - /// on the overall performance of the tool. - uint64_t RemovedFromNextInSequence; - -public: - DefaultResourceStrategy(uint64_t UnitMask) - : ResourceStrategy(), ResourceUnitMask(UnitMask), - NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {} - virtual ~DefaultResourceStrategy() = default; - - uint64_t select(uint64_t ReadyMask) override; - void used(uint64_t Mask) override; -}; - -/// A processor resource descriptor. -/// -/// There is an instance of this class for every processor resource defined by -/// the machine scheduling model. -/// Objects of class ResourceState dynamically track the usage of processor -/// resource units. -class ResourceState { - /// An index to the MCProcResourceDesc entry in the processor model. - const unsigned ProcResourceDescIndex; - /// A resource mask. This is generated by the tool with the help of - /// function `mca::createProcResourceMasks' (see Support.h). - const uint64_t ResourceMask; - - /// A ProcResource can have multiple units. - /// - /// For processor resource groups, - /// this field default to the value of field `ResourceMask`; the number of - /// bits set is equal to the cardinality of the group. For normal (i.e. - /// non-group) resources, the number of bits set in this mask is equivalent - /// to the number of units declared by the processor model (see field - /// 'NumUnits' in 'ProcResourceUnits'). - uint64_t ResourceSizeMask; - - /// A mask of ready units. - uint64_t ReadyMask; - - /// Buffered resources will have this field set to a positive number different - /// than zero. A buffered resource behaves like a reservation station - /// implementing its own buffer for out-of-order execution. - /// - /// A BufferSize of 1 is used by scheduler resources that force in-order - /// execution. - /// - /// A BufferSize of 0 is used to model in-order issue/dispatch resources. - /// Since in-order issue/dispatch resources don't implement buffers, dispatch - /// events coincide with issue events. - /// Also, no other instruction ca be dispatched/issue while this resource is - /// in use. Only when all the "resource cycles" are consumed (after the issue - /// event), a new instruction ca be dispatched. - const int BufferSize; - - /// Available slots in the buffer (zero, if this is not a buffered resource). - unsigned AvailableSlots; - - /// This field is set if this resource is currently reserved. - /// - /// Resources can be reserved for a number of cycles. - /// Instructions can still be dispatched to reserved resources. However, - /// istructions dispatched to a reserved resource cannot be issued to the - /// underlying units (i.e. pipelines) until the resource is released. - bool Unavailable; - - const bool IsAGroup; - - /// Checks for the availability of unit 'SubResMask' in the group. - bool isSubResourceReady(uint64_t SubResMask) const { - return ReadyMask & SubResMask; - } - -public: - ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask); - - unsigned getProcResourceID() const { return ProcResourceDescIndex; } - uint64_t getResourceMask() const { return ResourceMask; } - uint64_t getReadyMask() const { return ReadyMask; } - int getBufferSize() const { return BufferSize; } - - bool isBuffered() const { return BufferSize > 0; } - bool isInOrder() const { return BufferSize == 1; } - - /// Returns true if this is an in-order dispatch/issue resource. - bool isADispatchHazard() const { return BufferSize == 0; } - bool isReserved() const { return Unavailable; } - - void setReserved() { Unavailable = true; } - void clearReserved() { Unavailable = false; } - - /// Returs true if this resource is not reserved, and if there are at least - /// `NumUnits` available units. - bool isReady(unsigned NumUnits = 1) const; - - bool isAResourceGroup() const { return IsAGroup; } - - bool containsResource(uint64_t ID) const { return ResourceMask & ID; } - - void markSubResourceAsUsed(uint64_t ID) { - assert(isSubResourceReady(ID)); - ReadyMask ^= ID; - } - - void releaseSubResource(uint64_t ID) { - assert(!isSubResourceReady(ID)); - ReadyMask ^= ID; - } - - unsigned getNumUnits() const { - return isAResourceGroup() ? 1U : countPopulation(ResourceSizeMask); - } - - /// Checks if there is an available slot in the resource buffer. - /// - /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if - /// there is a slot available. - /// - /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it - /// is reserved. - /// - /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. - ResourceStateEvent isBufferAvailable() const; - - /// Reserve a slot in the buffer. - void reserveBuffer() { - if (AvailableSlots) - AvailableSlots--; - } - - /// Release a slot in the buffer. - void releaseBuffer() { - if (BufferSize > 0) - AvailableSlots++; - assert(AvailableSlots <= static_cast(BufferSize)); - } - -#ifndef NDEBUG - void dump() const; -#endif -}; - -/// A resource unit identifier. -/// -/// This is used to identify a specific processor resource unit using a pair -/// of indices where the 'first' index is a processor resource mask, and the -/// 'second' index is an index for a "sub-resource" (i.e. unit). -typedef std::pair ResourceRef; - -// First: a MCProcResourceDesc index identifying a buffered resource. -// Second: max number of buffer entries used in this resource. -typedef std::pair BufferUsageEntry; - -/// A resource manager for processor resource units and groups. -/// -/// This class owns all the ResourceState objects, and it is responsible for -/// acting on requests from a Scheduler by updating the internal state of -/// ResourceState objects. -/// This class doesn't know about instruction itineraries and functional units. -/// In future, it can be extended to support itineraries too through the same -/// public interface. -class ResourceManager { - // The resource manager owns all the ResourceState. - std::vector> Resources; - std::vector> Strategies; - - // Keeps track of which resources are busy, and how many cycles are left - // before those become usable again. - SmallDenseMap BusyResources; - - // A table to map processor resource IDs to processor resource masks. - SmallVector ProcResID2Mask; - - // Returns the actual resource unit that will be used. - ResourceRef selectPipe(uint64_t ResourceID); - - void use(const ResourceRef &RR); - void release(const ResourceRef &RR); - - unsigned getNumUnits(uint64_t ResourceID) const; - - // Overrides the selection strategy for the processor resource with the given - // mask. - void setCustomStrategyImpl(std::unique_ptr S, - uint64_t ResourceMask); - -public: - ResourceManager(const MCSchedModel &SM); - virtual ~ResourceManager() = default; - - // Overrides the selection strategy for the resource at index ResourceID in - // the MCProcResourceDesc table. - void setCustomStrategy(std::unique_ptr S, - unsigned ResourceID) { - assert(ResourceID < ProcResID2Mask.size() && - "Invalid resource index in input!"); - return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]); - } - - // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if - // there are enough available slots in the buffers. - ResourceStateEvent canBeDispatched(ArrayRef Buffers) const; - - // Return the processor resource identifier associated to this Mask. - unsigned resolveResourceMask(uint64_t Mask) const; - - // Consume a slot in every buffered resource from array 'Buffers'. Resource - // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. - void reserveBuffers(ArrayRef Buffers); - - // Release buffer entries previously allocated by method reserveBuffers. - void releaseBuffers(ArrayRef Buffers); - - // Reserve a processor resource. A reserved resource is not available for - // instruction issue until it is released. - void reserveResource(uint64_t ResourceID); - - // Release a previously reserved processor resource. - void releaseResource(uint64_t ResourceID); - - // Returns true if all resources are in-order, and there is at least one - // resource which is a dispatch hazard (BufferSize = 0). - bool mustIssueImmediately(const InstrDesc &Desc) const; - - bool canBeIssued(const InstrDesc &Desc) const; - - void issueInstruction( - const InstrDesc &Desc, - SmallVectorImpl> &Pipes); - - void cycleEvent(SmallVectorImpl &ResourcesFreed); - -#ifndef NDEBUG - void dump() const { - for (const std::unique_ptr &Resource : Resources) - Resource->dump(); - } -#endif -}; -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h @@ -1,104 +0,0 @@ -//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file simulates the hardware responsible for retiring instructions. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H -#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H - -#include "HardwareUnits/HardwareUnit.h" -#include "Instruction.h" -#include "llvm/MC/MCSchedule.h" -#include - -namespace llvm { -namespace mca { - -/// This class tracks which instructions are in-flight (i.e., dispatched but not -/// retired) in the OoO backend. -// -/// This class checks on every cycle if/which instructions can be retired. -/// Instructions are retired in program order. -/// In the event of an instruction being retired, the pipeline that owns -/// this RetireControlUnit (RCU) gets notified. -/// -/// On instruction retired, register updates are all architecturally -/// committed, and any physicall registers previously allocated for the -/// retired instruction are freed. -struct RetireControlUnit : public HardwareUnit { - // A RUToken is created by the RCU for every instruction dispatched to the - // schedulers. These "tokens" are managed by the RCU in its token Queue. - // - // On every cycle ('cycleEvent'), the RCU iterates through the token queue - // looking for any token with its 'Executed' flag set. If a token has that - // flag set, then the instruction has reached the write-back stage and will - // be retired by the RCU. - // - // 'NumSlots' represents the number of entries consumed by the instruction in - // the reorder buffer. Those entries will become available again once the - // instruction is retired. - // - // Note that the size of the reorder buffer is defined by the scheduling - // model via field 'NumMicroOpBufferSize'. - struct RUToken { - InstRef IR; - unsigned NumSlots; // Slots reserved to this instruction. - bool Executed; // True if the instruction is past the WB stage. - }; - -private: - unsigned NextAvailableSlotIdx; - unsigned CurrentInstructionSlotIdx; - unsigned AvailableSlots; - unsigned MaxRetirePerCycle; // 0 means no limit. - std::vector Queue; - -public: - RetireControlUnit(const MCSchedModel &SM); - - bool isEmpty() const { return AvailableSlots == Queue.size(); } - bool isAvailable(unsigned Quantity = 1) const { - // Some instructions may declare a number of uOps which exceeds the size - // of the reorder buffer. To avoid problems, cap the amount of slots to - // the size of the reorder buffer. - Quantity = std::min(Quantity, static_cast(Queue.size())); - - // Further normalize the number of micro opcodes for instructions that - // declare zero opcodes. This should match the behavior of method - // reserveSlot(). - Quantity = std::max(Quantity, 1U); - return AvailableSlots >= Quantity; - } - - unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; } - - // Reserves a number of slots, and returns a new token. - unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps); - - // Return the current token from the RCU's circular token queue. - const RUToken &peekCurrentToken() const; - - // Advance the pointer to the next token in the circular token queue. - void consumeCurrentToken(); - - // Update the RCU token to represent the executed state. - void onInstructionExecuted(unsigned TokenID); - -#ifndef NDEBUG - void dump() const; -#endif -}; - -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h +++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h @@ -1,214 +0,0 @@ -//===--------------------- Scheduler.h ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A scheduler for Processor Resource Units and Processor Resource Groups. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H -#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H - -#include "HardwareUnits/HardwareUnit.h" -#include "HardwareUnits/LSUnit.h" -#include "ResourceManager.h" -#include "Support.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" - -namespace llvm { -namespace mca { - -class SchedulerStrategy { -public: - SchedulerStrategy() = default; - virtual ~SchedulerStrategy(); - - /// Returns true if Lhs should take priority over Rhs. - /// - /// This method is used by class Scheduler to select the "best" ready - /// instruction to issue to the underlying pipelines. - virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0; -}; - -/// Default instruction selection strategy used by class Scheduler. -class DefaultSchedulerStrategy : public SchedulerStrategy { - /// This method ranks instructions based on their age, and the number of known - /// users. The lower the rank value, the better. - int computeRank(const InstRef &Lhs) const { - return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers(); - } - -public: - DefaultSchedulerStrategy() = default; - virtual ~DefaultSchedulerStrategy(); - - bool compare(const InstRef &Lhs, const InstRef &Rhs) const override { - int LhsRank = computeRank(Lhs); - int RhsRank = computeRank(Rhs); - - /// Prioritize older instructions over younger instructions to minimize the - /// pressure on the reorder buffer. - if (LhsRank == RhsRank) - return Lhs.getSourceIndex() < Rhs.getSourceIndex(); - return LhsRank < RhsRank; - } -}; - -/// Class Scheduler is responsible for issuing instructions to pipeline -/// resources. -/// -/// Internally, it delegates to a ResourceManager the management of processor -/// resources. This class is also responsible for tracking the progress of -/// instructions from the dispatch stage, until the write-back stage. -/// -/// An instruction dispatched to the Scheduler is initially placed into either -/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input -/// operands. -/// -/// An instruction is moved from the WaitSet to the ReadySet when register -/// operands become available, and all memory dependencies are met. -/// Instructions that are moved from the WaitSet to the ReadySet transition -/// in state from 'IS_AVAILABLE' to 'IS_READY'. -/// -/// On every cycle, the Scheduler checks if it can promote instructions from the -/// WaitSet to the ReadySet. -/// -/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued -/// to a (one or more) pipeline(s). This event also causes an instruction state -/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction -/// leaves the IssuedSet when it reaches the write-back stage. -class Scheduler : public HardwareUnit { - LSUnit &LSU; - - // Instruction selection strategy for this Scheduler. - std::unique_ptr Strategy; - - // Hardware resources that are managed by this scheduler. - std::unique_ptr Resources; - - std::vector WaitSet; - std::vector ReadySet; - std::vector IssuedSet; - - /// Verify the given selection strategy and set the Strategy member - /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is - /// used. - void initializeStrategy(std::unique_ptr S); - - /// Issue an instruction without updating the ready queue. - void issueInstructionImpl( - InstRef &IR, - SmallVectorImpl> &Pipes); - - // Identify instructions that have finished executing, and remove them from - // the IssuedSet. References to executed instructions are added to input - // vector 'Executed'. - void updateIssuedSet(SmallVectorImpl &Executed); - - // Try to promote instructions from WaitSet to ReadySet. - // Add promoted instructions to the 'Ready' vector in input. - void promoteToReadySet(SmallVectorImpl &Ready); - -public: - Scheduler(const MCSchedModel &Model, LSUnit &Lsu) - : Scheduler(Model, Lsu, nullptr) {} - - Scheduler(const MCSchedModel &Model, LSUnit &Lsu, - std::unique_ptr SelectStrategy) - : Scheduler(make_unique(Model), Lsu, - std::move(SelectStrategy)) {} - - Scheduler(std::unique_ptr RM, LSUnit &Lsu, - std::unique_ptr SelectStrategy) - : LSU(Lsu), Resources(std::move(RM)) { - initializeStrategy(std::move(SelectStrategy)); - } - - // Stalls generated by the scheduler. - enum Status { - SC_AVAILABLE, - SC_LOAD_QUEUE_FULL, - SC_STORE_QUEUE_FULL, - SC_BUFFERS_FULL, - SC_DISPATCH_GROUP_STALL, - }; - - /// Check if the instruction in 'IR' can be dispatched and returns an answer - /// in the form of a Status value. - /// - /// The DispatchStage is responsible for querying the Scheduler before - /// dispatching new instructions. This routine is used for performing such - /// a query. If the instruction 'IR' can be dispatched, then true is - /// returned, otherwise false is returned with Event set to the stall type. - /// Internally, it also checks if the load/store unit is available. - Status isAvailable(const InstRef &IR) const; - - /// Reserves buffer and LSUnit queue resources that are necessary to issue - /// this instruction. - /// - /// Returns true if instruction IR is ready to be issued to the underlying - /// pipelines. Note that this operation cannot fail; it assumes that a - /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`. - void dispatch(const InstRef &IR); - - /// Returns true if IR is ready to be executed by the underlying pipelines. - /// This method assumes that IR has been previously dispatched. - bool isReady(const InstRef &IR) const; - - /// Issue an instruction and populates a vector of used pipeline resources, - /// and a vector of instructions that transitioned to the ready state as a - /// result of this event. - void issueInstruction( - InstRef &IR, - SmallVectorImpl> &Used, - SmallVectorImpl &Ready); - - /// Returns true if IR has to be issued immediately, or if IR is a zero - /// latency instruction. - bool mustIssueImmediately(const InstRef &IR) const; - - /// This routine notifies the Scheduler that a new cycle just started. - /// - /// It notifies the underlying ResourceManager that a new cycle just started. - /// Vector `Freed` is populated with resourceRef related to resources that - /// have changed in state, and that are now available to new instructions. - /// Instructions executed are added to vector Executed, while vector Ready is - /// populated with instructions that have become ready in this new cycle. - void cycleEvent(SmallVectorImpl &Freed, - SmallVectorImpl &Ready, - SmallVectorImpl &Executed); - - /// Convert a resource mask into a valid llvm processor resource identifier. - unsigned getResourceID(uint64_t Mask) const { - return Resources->resolveResourceMask(Mask); - } - - /// Select the next instruction to issue from the ReadySet. Returns an invalid - /// instruction reference if there are no ready instructions, or if processor - /// resources are not available. - InstRef select(); - -#ifndef NDEBUG - // Update the ready queues. - void dump() const; - - // This routine performs a sanity check. This routine should only be called - // when we know that 'IR' is not in the scheduler's instruction queues. - void sanityCheck(const InstRef &IR) const { - assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!"); - assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!"); - assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!"); - } -#endif // !NDEBUG -}; -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H Index: llvm/trunk/tools/llvm-mca/include/InstrBuilder.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/InstrBuilder.h +++ llvm/trunk/tools/llvm-mca/include/InstrBuilder.h @@ -1,77 +0,0 @@ -//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A builder class for instructions that are statically analyzed by llvm-mca. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H -#define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H - -#include "Instruction.h" -#include "Support.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace mca { - -/// A builder class that knows how to construct Instruction objects. -/// -/// Every llvm-mca Instruction is described by an object of class InstrDesc. -/// An InstrDesc describes which registers are read/written by the instruction, -/// as well as the instruction latency and hardware resources consumed. -/// -/// This class is used by the tool to construct Instructions and instruction -/// descriptors (i.e. InstrDesc objects). -/// Information from the machine scheduling model is used to identify processor -/// resources that are consumed by an instruction. -class InstrBuilder { - const MCSubtargetInfo &STI; - const MCInstrInfo &MCII; - const MCRegisterInfo &MRI; - const MCInstrAnalysis &MCIA; - SmallVector ProcResourceMasks; - - DenseMap> Descriptors; - DenseMap> VariantDescriptors; - - bool FirstCallInst; - bool FirstReturnInst; - - Expected createInstrDescImpl(const MCInst &MCI); - Expected getOrCreateInstrDesc(const MCInst &MCI); - - InstrBuilder(const InstrBuilder &) = delete; - InstrBuilder &operator=(const InstrBuilder &) = delete; - - void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID); - void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID); - Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const; - -public: - InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, - const MCRegisterInfo &RI, const MCInstrAnalysis &IA); - - void clear() { - VariantDescriptors.shrink_and_clear(); - FirstCallInst = true; - FirstReturnInst = true; - } - - Expected> createInstruction(const MCInst &MCI); -}; -} // namespace mca -} // namespace llvm - -#endif Index: llvm/trunk/tools/llvm-mca/include/Instruction.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Instruction.h +++ llvm/trunk/tools/llvm-mca/include/Instruction.h @@ -1,542 +0,0 @@ -//===--------------------- Instruction.h ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines abstractions used by the Pipeline to model register reads, -/// register writes and instructions. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H -#define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MathExtras.h" - -#ifndef NDEBUG -#include "llvm/Support/raw_ostream.h" -#endif - -#include - -namespace llvm { -namespace mca { - -constexpr int UNKNOWN_CYCLES = -512; - -/// A register write descriptor. -struct WriteDescriptor { - // Operand index. The index is negative for implicit writes only. - // For implicit writes, the actual operand index is computed performing - // a bitwise not of the OpIndex. - int OpIndex; - // Write latency. Number of cycles before write-back stage. - unsigned Latency; - // This field is set to a value different than zero only if this - // is an implicit definition. - unsigned RegisterID; - // Instruction itineraries would set this field to the SchedClass ID. - // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry - // element associated to this write. - // When computing read latencies, this value is matched against the - // "ReadAdvance" information. The hardware backend may implement - // dedicated forwarding paths to quickly propagate write results to dependent - // instructions waiting in the reservation station (effectively bypassing the - // write-back stage). - unsigned SClassOrWriteResourceID; - // True only if this is a write obtained from an optional definition. - // Optional definitions are allowed to reference regID zero (i.e. "no - // register"). - bool IsOptionalDef; - - bool isImplicitWrite() const { return OpIndex < 0; }; -}; - -/// A register read descriptor. -struct ReadDescriptor { - // A MCOperand index. This is used by the Dispatch logic to identify register - // reads. Implicit reads have negative indices. The actual operand index of an - // implicit read is the bitwise not of field OpIndex. - int OpIndex; - // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit - // uses always come first in the sequence of uses. - unsigned UseIndex; - // This field is only set if this is an implicit read. - unsigned RegisterID; - // Scheduling Class Index. It is used to query the scheduling model for the - // MCSchedClassDesc object. - unsigned SchedClassID; - - bool isImplicitRead() const { return OpIndex < 0; }; -}; - -class ReadState; - -/// Tracks uses of a register definition (e.g. register write). -/// -/// Each implicit/explicit register write is associated with an instance of -/// this class. A WriteState object tracks the dependent users of a -/// register write. It also tracks how many cycles are left before the write -/// back stage. -class WriteState { - const WriteDescriptor *WD; - // On instruction issue, this field is set equal to the write latency. - // Before instruction issue, this field defaults to -512, a special - // value that represents an "unknown" number of cycles. - int CyclesLeft; - - // Actual register defined by this write. This field is only used - // to speedup queries on the register file. - // For implicit writes, this field always matches the value of - // field RegisterID from WD. - unsigned RegisterID; - - // Physical register file that serves register RegisterID. - unsigned PRFID; - - // True if this write implicitly clears the upper portion of RegisterID's - // super-registers. - bool ClearsSuperRegs; - - // True if this write is from a dependency breaking zero-idiom instruction. - bool WritesZero; - - // True if this write has been eliminated at register renaming stage. - // Example: a register move doesn't consume scheduler/pipleline resources if - // it is eliminated at register renaming stage. It still consumes - // decode bandwidth, and ROB entries. - bool IsEliminated; - - // This field is set if this is a partial register write, and it has a false - // dependency on any previous write of the same register (or a portion of it). - // DependentWrite must be able to complete before this write completes, so - // that we don't break the WAW, and the two writes can be merged together. - const WriteState *DependentWrite; - - // A partial write that is in a false dependency with this write. - WriteState *PartialWrite; - - unsigned DependentWriteCyclesLeft; - - // A list of dependent reads. Users is a set of dependent - // reads. A dependent read is added to the set only if CyclesLeft - // is "unknown". As soon as CyclesLeft is 'known', each user in the set - // gets notified with the actual CyclesLeft. - - // The 'second' element of a pair is a "ReadAdvance" number of cycles. - SmallVector, 4> Users; - -public: - WriteState(const WriteDescriptor &Desc, unsigned RegID, - bool clearsSuperRegs = false, bool writesZero = false) - : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), - PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), - IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr), - DependentWriteCyclesLeft(0) {} - - WriteState(const WriteState &Other) = default; - WriteState &operator=(const WriteState &Other) = default; - - int getCyclesLeft() const { return CyclesLeft; } - unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; } - unsigned getRegisterID() const { return RegisterID; } - unsigned getRegisterFileID() const { return PRFID; } - unsigned getLatency() const { return WD->Latency; } - - void addUser(ReadState *Use, int ReadAdvance); - void addUser(WriteState *Use); - - unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; } - - unsigned getNumUsers() const { - unsigned NumUsers = Users.size(); - if (PartialWrite) - ++NumUsers; - return NumUsers; - } - - bool clearsSuperRegisters() const { return ClearsSuperRegs; } - bool isWriteZero() const { return WritesZero; } - bool isEliminated() const { return IsEliminated; } - bool isExecuted() const { - return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0; - } - - const WriteState *getDependentWrite() const { return DependentWrite; } - void setDependentWrite(WriteState *Other) { DependentWrite = Other; } - void writeStartEvent(unsigned Cycles) { - DependentWriteCyclesLeft = Cycles; - DependentWrite = nullptr; - } - - void setWriteZero() { WritesZero = true; } - void setEliminated() { - assert(Users.empty() && "Write is in an inconsistent state."); - CyclesLeft = 0; - IsEliminated = true; - } - - void setPRF(unsigned PRF) { PRFID = PRF; } - - // On every cycle, update CyclesLeft and notify dependent users. - void cycleEvent(); - void onInstructionIssued(); - -#ifndef NDEBUG - void dump() const; -#endif -}; - -/// Tracks register operand latency in cycles. -/// -/// A read may be dependent on more than one write. This occurs when some -/// writes only partially update the register associated to this read. -class ReadState { - const ReadDescriptor *RD; - // Physical register identified associated to this read. - unsigned RegisterID; - // Physical register file that serves register RegisterID. - unsigned PRFID; - // Number of writes that contribute to the definition of RegisterID. - // In the absence of partial register updates, the number of DependentWrites - // cannot be more than one. - unsigned DependentWrites; - // Number of cycles left before RegisterID can be read. This value depends on - // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES. - // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of - // every dependent write is known. - int CyclesLeft; - // This field is updated on every writeStartEvent(). When the number of - // dependent writes (i.e. field DependentWrite) is zero, this value is - // propagated to field CyclesLeft. - unsigned TotalCycles; - // This field is set to true only if there are no dependent writes, and - // there are no `CyclesLeft' to wait. - bool IsReady; - // True if this is a read from a known zero register. - bool IsZero; - // True if this register read is from a dependency-breaking instruction. - bool IndependentFromDef; - -public: - ReadState(const ReadDescriptor &Desc, unsigned RegID) - : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0), - CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true), - IsZero(false), IndependentFromDef(false) {} - - const ReadDescriptor &getDescriptor() const { return *RD; } - unsigned getSchedClass() const { return RD->SchedClassID; } - unsigned getRegisterID() const { return RegisterID; } - unsigned getRegisterFileID() const { return PRFID; } - - bool isReady() const { return IsReady; } - bool isImplicitRead() const { return RD->isImplicitRead(); } - - bool isIndependentFromDef() const { return IndependentFromDef; } - void setIndependentFromDef() { IndependentFromDef = true; } - - void cycleEvent(); - void writeStartEvent(unsigned Cycles); - void setDependentWrites(unsigned Writes) { - DependentWrites = Writes; - IsReady = !Writes; - } - - bool isReadZero() const { return IsZero; } - void setReadZero() { IsZero = true; } - void setPRF(unsigned ID) { PRFID = ID; } -}; - -/// A sequence of cycles. -/// -/// This class can be used as a building block to construct ranges of cycles. -class CycleSegment { - unsigned Begin; // Inclusive. - unsigned End; // Exclusive. - bool Reserved; // Resources associated to this segment must be reserved. - -public: - CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false) - : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {} - - bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; } - bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; } - bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; } - bool overlaps(const CycleSegment &CS) const { - return !startsAfter(CS) && !endsBefore(CS); - } - bool isExecuting() const { return Begin == 0 && End != 0; } - bool isExecuted() const { return End == 0; } - bool operator<(const CycleSegment &Other) const { - return Begin < Other.Begin; - } - CycleSegment &operator--(void) { - if (Begin) - Begin--; - if (End) - End--; - return *this; - } - - bool isValid() const { return Begin <= End; } - unsigned size() const { return End - Begin; }; - void subtract(unsigned Cycles) { - assert(End >= Cycles); - End -= Cycles; - } - - unsigned begin() const { return Begin; } - unsigned end() const { return End; } - void setEnd(unsigned NewEnd) { End = NewEnd; } - bool isReserved() const { return Reserved; } - void setReserved() { Reserved = true; } -}; - -/// Helper used by class InstrDesc to describe how hardware resources -/// are used. -/// -/// This class describes how many resource units of a specific resource kind -/// (and how many cycles) are "used" by an instruction. -struct ResourceUsage { - CycleSegment CS; - unsigned NumUnits; - ResourceUsage(CycleSegment Cycles, unsigned Units = 1) - : CS(Cycles), NumUnits(Units) {} - unsigned size() const { return CS.size(); } - bool isReserved() const { return CS.isReserved(); } - void setReserved() { CS.setReserved(); } -}; - -/// An instruction descriptor -struct InstrDesc { - SmallVector Writes; // Implicit writes are at the end. - SmallVector Reads; // Implicit reads are at the end. - - // For every resource used by an instruction of this kind, this vector - // reports the number of "consumed cycles". - SmallVector, 4> Resources; - - // A list of buffered resources consumed by this instruction. - SmallVector Buffers; - - unsigned MaxLatency; - // Number of MicroOps for this instruction. - unsigned NumMicroOps; - - bool MayLoad; - bool MayStore; - bool HasSideEffects; - - // A zero latency instruction doesn't consume any scheduler resources. - bool isZeroLatency() const { return !MaxLatency && Resources.empty(); } - - InstrDesc() = default; - InstrDesc(const InstrDesc &Other) = delete; - InstrDesc &operator=(const InstrDesc &Other) = delete; -}; - -/// Base class for instructions consumed by the simulation pipeline. -/// -/// This class tracks data dependencies as well as generic properties -/// of the instruction. -class InstructionBase { - const InstrDesc &Desc; - - // This field is set for instructions that are candidates for move - // elimination. For more information about move elimination, see the - // definition of RegisterMappingTracker in RegisterFile.h - bool IsOptimizableMove; - - // Output dependencies. - // One entry per each implicit and explicit register definition. - SmallVector Defs; - - // Input dependencies. - // One entry per each implicit and explicit register use. - SmallVector Uses; - -public: - InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {} - - SmallVectorImpl &getDefs() { return Defs; } - const ArrayRef getDefs() const { return Defs; } - SmallVectorImpl &getUses() { return Uses; } - const ArrayRef getUses() const { return Uses; } - const InstrDesc &getDesc() const { return Desc; } - - unsigned getLatency() const { return Desc.MaxLatency; } - - bool hasDependentUsers() const { - return any_of(Defs, - [](const WriteState &Def) { return Def.getNumUsers() > 0; }); - } - - unsigned getNumUsers() const { - unsigned NumUsers = 0; - for (const WriteState &Def : Defs) - NumUsers += Def.getNumUsers(); - return NumUsers; - } - - // Returns true if this instruction is a candidate for move elimination. - bool isOptimizableMove() const { return IsOptimizableMove; } - void setOptimizableMove() { IsOptimizableMove = true; } -}; - -/// An instruction propagated through the simulated instruction pipeline. -/// -/// This class is used to monitor changes to the internal state of instructions -/// that are sent to the various components of the simulated hardware pipeline. -class Instruction : public InstructionBase { - enum InstrStage { - IS_INVALID, // Instruction in an invalid state. - IS_AVAILABLE, // Instruction dispatched but operands are not ready. - IS_READY, // Instruction dispatched and operands ready. - IS_EXECUTING, // Instruction issued. - IS_EXECUTED, // Instruction executed. Values are written back. - IS_RETIRED // Instruction retired. - }; - - // The current instruction stage. - enum InstrStage Stage; - - // This value defaults to the instruction latency. This instruction is - // considered executed when field CyclesLeft goes to zero. - int CyclesLeft; - - // Retire Unit token ID for this instruction. - unsigned RCUTokenID; - -public: - Instruction(const InstrDesc &D) - : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), - RCUTokenID(0) {} - - unsigned getRCUTokenID() const { return RCUTokenID; } - int getCyclesLeft() const { return CyclesLeft; } - - // Transition to the dispatch stage, and assign a RCUToken to this - // instruction. The RCUToken is used to track the completion of every - // register write performed by this instruction. - void dispatch(unsigned RCUTokenID); - - // Instruction issued. Transition to the IS_EXECUTING state, and update - // all the definitions. - void execute(); - - // Force a transition from the IS_AVAILABLE state to the IS_READY state if - // input operands are all ready. State transitions normally occur at the - // beginning of a new cycle (see method cycleEvent()). However, the scheduler - // may decide to promote instructions from the wait queue to the ready queue - // as the result of another issue event. This method is called every time the - // instruction might have changed in state. - void update(); - - bool isDispatched() const { return Stage == IS_AVAILABLE; } - bool isReady() const { return Stage == IS_READY; } - bool isExecuting() const { return Stage == IS_EXECUTING; } - bool isExecuted() const { return Stage == IS_EXECUTED; } - bool isRetired() const { return Stage == IS_RETIRED; } - - bool isEliminated() const { - return isReady() && getDefs().size() && - all_of(getDefs(), - [](const WriteState &W) { return W.isEliminated(); }); - } - - // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED. - void forceExecuted(); - - void retire() { - assert(isExecuted() && "Instruction is in an invalid state!"); - Stage = IS_RETIRED; - } - - void cycleEvent(); -}; - -/// An InstRef contains both a SourceMgr index and Instruction pair. The index -/// is used as a unique identifier for the instruction. MCA will make use of -/// this index as a key throughout MCA. -class InstRef { - std::pair Data; - -public: - InstRef() : Data(std::make_pair(0, nullptr)) {} - InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {} - - bool operator==(const InstRef &Other) const { return Data == Other.Data; } - - unsigned getSourceIndex() const { return Data.first; } - Instruction *getInstruction() { return Data.second; } - const Instruction *getInstruction() const { return Data.second; } - - /// Returns true if this references a valid instruction. - operator bool() const { return Data.second != nullptr; } - - /// Invalidate this reference. - void invalidate() { Data.second = nullptr; } - -#ifndef NDEBUG - void print(raw_ostream &OS) const { OS << getSourceIndex(); } -#endif -}; - -#ifndef NDEBUG -inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) { - IR.print(OS); - return OS; -} -#endif - -/// A reference to a register write. -/// -/// This class is mainly used by the register file to describe register -/// mappings. It correlates a register write to the source index of the -/// defining instruction. -class WriteRef { - std::pair Data; - static const unsigned INVALID_IID; - -public: - WriteRef() : Data(INVALID_IID, nullptr) {} - WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {} - - unsigned getSourceIndex() const { return Data.first; } - const WriteState *getWriteState() const { return Data.second; } - WriteState *getWriteState() { return Data.second; } - void invalidate() { Data.second = nullptr; } - bool isWriteZero() const { - assert(isValid() && "Invalid null WriteState found!"); - return getWriteState()->isWriteZero(); - } - - /// Returns true if this register write has been executed, and the new - /// register value is therefore available to users. - bool isAvailable() const { - if (getSourceIndex() == INVALID_IID) - return false; - const WriteState *WS = getWriteState(); - return !WS || WS->isExecuted(); - } - - bool isValid() const { return Data.first != INVALID_IID && Data.second; } - bool operator==(const WriteRef &Other) const { return Data == Other.Data; } - -#ifndef NDEBUG - void dump() const; -#endif -}; - -} // namespace mca -} // namespace llvm - -#endif Index: llvm/trunk/tools/llvm-mca/include/Pipeline.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Pipeline.h +++ llvm/trunk/tools/llvm-mca/include/Pipeline.h @@ -1,79 +0,0 @@ -//===--------------------- Pipeline.h ---------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements an ordered container of stages that simulate the -/// pipeline of a hardware backend. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINE_H -#define LLVM_TOOLS_LLVM_MCA_PIPELINE_H - -#include "HardwareUnits/Scheduler.h" -#include "Stages/Stage.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace mca { - -class HWEventListener; - -/// A pipeline for a specific subtarget. -/// -/// It emulates an out-of-order execution of instructions. Instructions are -/// fetched from a MCInst sequence managed by an initial 'Fetch' stage. -/// Instructions are firstly fetched, then dispatched to the schedulers, and -/// then executed. -/// -/// This class tracks the lifetime of an instruction from the moment where -/// it gets dispatched to the schedulers, to the moment where it finishes -/// executing and register writes are architecturally committed. -/// In particular, it monitors changes in the state of every instruction -/// in flight. -/// -/// Instructions are executed in a loop of iterations. The number of iterations -/// is defined by the SourceMgr object, which is managed by the initial stage -/// of the instruction pipeline. -/// -/// The Pipeline entry point is method 'run()' which executes cycles in a loop -/// until there are new instructions to dispatch, and not every instruction -/// has been retired. -/// -/// Internally, the Pipeline collects statistical information in the form of -/// histograms. For example, it tracks how the dispatch group size changes -/// over time. -class Pipeline { - Pipeline(const Pipeline &P) = delete; - Pipeline &operator=(const Pipeline &P) = delete; - - /// An ordered list of stages that define this instruction pipeline. - SmallVector, 8> Stages; - std::set Listeners; - unsigned Cycles; - - Error runCycle(); - bool hasWorkToProcess(); - void notifyCycleBegin(); - void notifyCycleEnd(); - -public: - Pipeline() : Cycles(0) {} - void appendStage(std::unique_ptr S); - - /// Returns the total number of simulated cycles. - Expected run(); - - void addEventListener(HWEventListener *Listener); -}; -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_PIPELINE_H Index: llvm/trunk/tools/llvm-mca/include/SourceMgr.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/SourceMgr.h +++ llvm/trunk/tools/llvm-mca/include/SourceMgr.h @@ -1,57 +0,0 @@ -//===--------------------- SourceMgr.h --------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements class SourceMgr. Class SourceMgr abstracts the input -/// code sequence (a sequence of MCInst), and assings unique identifiers to -/// every instruction in the sequence. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H -#define LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H - -#include "llvm/ADT/ArrayRef.h" - -namespace llvm { -namespace mca { - -class Instruction; - -typedef std::pair SourceRef; - -class SourceMgr { - using UniqueInst = std::unique_ptr; - ArrayRef Sequence; - unsigned Current; - const unsigned Iterations; - static const unsigned DefaultIterations = 100; - -public: - SourceMgr(ArrayRef S, unsigned Iter) - : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {} - - unsigned getNumIterations() const { return Iterations; } - unsigned size() const { return Sequence.size(); } - bool hasNext() const { return Current < (Iterations * Sequence.size()); } - void updateNext() { ++Current; } - - SourceRef peekNext() const { - assert(hasNext() && "Already at end of sequence!"); - return SourceRef(Current, *Sequence[Current % Sequence.size()]); - } - - using const_iterator = ArrayRef::const_iterator; - const_iterator begin() const { return Sequence.begin(); } - const_iterator end() const { return Sequence.end(); } -}; - -} // namespace mca -} // namespace llvm - -#endif Index: llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h @@ -1,93 +0,0 @@ -//===----------------------- DispatchStage.h --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file models the dispatch component of an instruction pipeline. -/// -/// The DispatchStage is responsible for updating instruction dependencies -/// and communicating to the simulated instruction scheduler that an instruction -/// is ready to be scheduled for execution. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H - -#include "HWEventListener.h" -#include "HardwareUnits/RegisterFile.h" -#include "HardwareUnits/RetireControlUnit.h" -#include "Instruction.h" -#include "Stages/Stage.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { -namespace mca { - -// Implements the hardware dispatch logic. -// -// This class is responsible for the dispatch stage, in which instructions are -// dispatched in groups to the Scheduler. An instruction can be dispatched if -// the following conditions are met: -// 1) There are enough entries in the reorder buffer (see class -// RetireControlUnit) to write the opcodes associated with the instruction. -// 2) There are enough physical registers to rename output register operands. -// 3) There are enough entries available in the used buffered resource(s). -// -// The number of micro opcodes that can be dispatched in one cycle is limited by -// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when -// processor resources are not available. Dispatch stall events are counted -// during the entire execution of the code, and displayed by the performance -// report when flag '-dispatch-stats' is specified. -// -// If the number of micro opcodes exceedes DispatchWidth, then the instruction -// is dispatched in multiple cycles. -class DispatchStage final : public Stage { - unsigned DispatchWidth; - unsigned AvailableEntries; - unsigned CarryOver; - InstRef CarriedOver; - const MCSubtargetInfo &STI; - RetireControlUnit &RCU; - RegisterFile &PRF; - - bool checkRCU(const InstRef &IR) const; - bool checkPRF(const InstRef &IR) const; - bool canDispatch(const InstRef &IR) const; - Error dispatch(InstRef IR); - - void updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI); - - void notifyInstructionDispatched(const InstRef &IR, - ArrayRef UsedPhysRegs, - unsigned uOps) const; - -public: - DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI, - unsigned MaxDispatchWidth, RetireControlUnit &R, - RegisterFile &F) - : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), - CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {} - - bool isAvailable(const InstRef &IR) const override; - - // The dispatch logic internally doesn't buffer instructions. So there is - // never work to do at the beginning of every cycle. - bool hasWorkToComplete() const override { return false; } - Error cycleStart() override; - Error execute(InstRef &IR) override; - -#ifndef NDEBUG - void dump() const; -#endif -}; -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/EntryStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/EntryStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/EntryStage.h @@ -1,52 +0,0 @@ -//===---------------------- EntryStage.h ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the Entry stage of an instruction pipeline. Its sole -/// purpose in life is to pick instructions in sequence and move them to the -/// next pipeline stage. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_ENTRY_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_ENTRY_STAGE_H - -#include "SourceMgr.h" -#include "Stages/Stage.h" -#include "llvm/ADT/SmallVector.h" - -namespace llvm { -namespace mca { - -class EntryStage final : public Stage { - InstRef CurrentInstruction; - SmallVector, 16> Instructions; - SourceMgr &SM; - unsigned NumRetired; - - // Updates the program counter, and sets 'CurrentInstruction'. - void getNextInstruction(); - - EntryStage(const EntryStage &Other) = delete; - EntryStage &operator=(const EntryStage &Other) = delete; - -public: - EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { } - - bool isAvailable(const InstRef &IR) const override; - bool hasWorkToComplete() const override; - Error execute(InstRef &IR) override; - Error cycleStart() override; - Error cycleEnd() override; -}; - -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h @@ -1,80 +0,0 @@ -//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the execution stage of a default instruction pipeline. -/// -/// The ExecuteStage is responsible for managing the hardware scheduler -/// and issuing notifications that an instruction has been executed. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H - -#include "HardwareUnits/Scheduler.h" -#include "Instruction.h" -#include "Stages/Stage.h" -#include "llvm/ADT/ArrayRef.h" - -namespace llvm { -namespace mca { - -class ExecuteStage final : public Stage { - Scheduler &HWS; - - Error issueInstruction(InstRef &IR); - - // Called at the beginning of each cycle to issue already dispatched - // instructions to the underlying pipelines. - Error issueReadyInstructions(); - - // Used to notify instructions eliminated at register renaming stage. - Error handleInstructionEliminated(InstRef &IR); - - ExecuteStage(const ExecuteStage &Other) = delete; - ExecuteStage &operator=(const ExecuteStage &Other) = delete; - -public: - ExecuteStage(Scheduler &S) : Stage(), HWS(S) {} - - // This stage works under the assumption that the Pipeline will eventually - // execute a retire stage. We don't need to check if pipelines and/or - // schedulers have instructions to process, because those instructions are - // also tracked by the retire control unit. That means, - // RetireControlUnit::hasWorkToComplete() is responsible for checking if there - // are still instructions in-flight in the out-of-order backend. - bool hasWorkToComplete() const override { return false; } - bool isAvailable(const InstRef &IR) const override; - - // Notifies the scheduler that a new cycle just started. - // - // This method notifies the scheduler that a new cycle started. - // This method is also responsible for notifying listeners about instructions - // state changes, and processor resources freed by the scheduler. - // Instructions that transitioned to the 'Executed' state are automatically - // moved to the next stage (i.e. RetireStage). - Error cycleStart() override; - Error execute(InstRef &IR) override; - - void notifyInstructionIssued( - const InstRef &IR, - ArrayRef> Used) const; - void notifyInstructionExecuted(const InstRef &IR) const; - void notifyInstructionReady(const InstRef &IR) const; - void notifyResourceAvailable(const ResourceRef &RR) const; - - // Notify listeners that buffered resources have been consumed or freed. - void notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const; -}; - -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h +++ llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h @@ -1,45 +0,0 @@ -//===--------------------- InstructionTables.h ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements a custom stage to generate instruction tables. -/// See the description of command-line flag -instruction-tables in -/// docs/CommandGuide/lvm-mca.rst -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H -#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H - -#include "HardwareUnits/Scheduler.h" -#include "Stages/Stage.h" -#include "Support.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" - -namespace llvm { -namespace mca { - -class InstructionTables final : public Stage { - const MCSchedModel &SM; - SmallVector, 4> UsedResources; - SmallVector Masks; - -public: - InstructionTables(const MCSchedModel &Model) : Stage(), SM(Model) { - computeProcResourceMasks(Model, Masks); - } - - bool hasWorkToComplete() const override { return false; } - Error execute(InstRef &IR) override; -}; -} // namespace mca -} // namespace llvm - -#endif Index: llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h @@ -1,48 +0,0 @@ -//===---------------------- RetireStage.h -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the retire stage of a default instruction pipeline. -/// The RetireStage represents the process logic that interacts with the -/// simulated RetireControlUnit hardware. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H - -#include "HardwareUnits/RegisterFile.h" -#include "HardwareUnits/RetireControlUnit.h" -#include "Stages/Stage.h" - -namespace llvm { -namespace mca { - -class RetireStage final : public Stage { - // Owner will go away when we move listeners/eventing to the stages. - RetireControlUnit &RCU; - RegisterFile &PRF; - - RetireStage(const RetireStage &Other) = delete; - RetireStage &operator=(const RetireStage &Other) = delete; - -public: - RetireStage(RetireControlUnit &R, RegisterFile &F) - : Stage(), RCU(R), PRF(F) {} - - bool hasWorkToComplete() const override { return !RCU.isEmpty(); } - Error cycleStart() override; - Error execute(InstRef &IR) override; - void notifyInstructionRetired(const InstRef &IR) const; -}; - -} // namespace mca -} // namespace llvm - -#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Stages/Stage.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Stages/Stage.h +++ llvm/trunk/tools/llvm-mca/include/Stages/Stage.h @@ -1,88 +0,0 @@ -//===---------------------- Stage.h -----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a stage. -/// A chain of stages compose an instruction pipeline. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_STAGE_H -#define LLVM_TOOLS_LLVM_MCA_STAGE_H - -#include "HWEventListener.h" -#include "llvm/Support/Error.h" -#include - -namespace llvm { -namespace mca { - -class InstRef; - -class Stage { - Stage *NextInSequence; - std::set Listeners; - - Stage(const Stage &Other) = delete; - Stage &operator=(const Stage &Other) = delete; - -protected: - const std::set &getListeners() const { return Listeners; } - -public: - Stage() : NextInSequence(nullptr) {} - virtual ~Stage(); - - /// Returns true if it can execute IR during this cycle. - virtual bool isAvailable(const InstRef &IR) const { return true; } - - /// Returns true if some instructions are still executing this stage. - virtual bool hasWorkToComplete() const = 0; - - /// Called once at the start of each cycle. This can be used as a setup - /// phase to prepare for the executions during the cycle. - virtual Error cycleStart() { return ErrorSuccess(); } - - /// Called once at the end of each cycle. - virtual Error cycleEnd() { return ErrorSuccess(); } - - /// The primary action that this stage performs on instruction IR. - virtual Error execute(InstRef &IR) = 0; - - void setNextInSequence(Stage *NextStage) { - assert(!NextInSequence && "This stage already has a NextInSequence!"); - NextInSequence = NextStage; - } - - bool checkNextStage(const InstRef &IR) const { - return NextInSequence && NextInSequence->isAvailable(IR); - } - - /// Called when an instruction is ready to move the next pipeline stage. - /// - /// Stages are responsible for moving instructions to their immediate - /// successor stages. - Error moveToTheNextStage(InstRef &IR) { - assert(checkNextStage(IR) && "Next stage is not ready!"); - return NextInSequence->execute(IR); - } - - /// Add a listener to receive callbacks during the execution of this stage. - void addListener(HWEventListener *Listener); - - /// Notify listeners of a particular hardware event. - template void notifyEvent(const EventT &Event) const { - for (HWEventListener *Listener : Listeners) - Listener->onEvent(Event); - } -}; - -} // namespace mca -} // namespace llvm -#endif // LLVM_TOOLS_LLVM_MCA_STAGE_H Index: llvm/trunk/tools/llvm-mca/include/Support.h =================================================================== --- llvm/trunk/tools/llvm-mca/include/Support.h +++ llvm/trunk/tools/llvm-mca/include/Support.h @@ -1,119 +0,0 @@ -//===--------------------- Support.h ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// Helper functions used by various pipeline components. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H -#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCSchedule.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace mca { - -template -class InstructionError : public ErrorInfo> { -public: - static char ID; - std::string Message; - const T &Inst; - - InstructionError(std::string M, const T &MCI) - : Message(std::move(M)), Inst(MCI) {} - - void log(raw_ostream &OS) const override { OS << Message; } - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } -}; - -template char InstructionError::ID; - -/// This class represents the number of cycles per resource (fractions of -/// cycles). That quantity is managed here as a ratio, and accessed via the -/// double cast-operator below. The two quantities, number of cycles and -/// number of resources, are kept separate. This is used by the -/// ResourcePressureView to calculate the average resource cycles -/// per instruction/iteration. -class ResourceCycles { - unsigned Numerator, Denominator; - -public: - ResourceCycles() : Numerator(0), Denominator(1) {} - ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1) - : Numerator(Cycles), Denominator(ResourceUnits) {} - - operator double() const { - assert(Denominator && "Invalid denominator (must be non-zero)."); - return (Denominator == 1) ? Numerator : (double)Numerator / Denominator; - } - - // Add the components of RHS to this instance. Instead of calculating - // the final value here, we keep track of the numerator and denominator - // separately, to reduce floating point error. - ResourceCycles &operator+=(const ResourceCycles &RHS) { - if (Denominator == RHS.Denominator) - Numerator += RHS.Numerator; - else { - // Create a common denominator for LHS and RHS by calculating the least - // common multiple from the GCD. - unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator); - unsigned LCM = (Denominator * RHS.Denominator) / GCD; - unsigned LHSNumerator = Numerator * (LCM / Denominator); - unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator); - Numerator = LHSNumerator + RHSNumerator; - Denominator = LCM; - } - return *this; - } -}; - -/// Populates vector Masks with processor resource masks. -/// -/// The number of bits set in a mask depends on the processor resource type. -/// Each processor resource mask has at least one bit set. For groups, the -/// number of bits set in the mask is equal to the cardinality of the group plus -/// one. Excluding the most significant bit, the remaining bits in the mask -/// identify processor resources that are part of the group. -/// -/// Example: -/// -/// ResourceA -- Mask: 0b001 -/// ResourceB -- Mask: 0b010 -/// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111 -/// -/// ResourceAB is a processor resource group containing ResourceA and ResourceB. -/// Each resource mask uniquely identifies a resource; both ResourceA and -/// ResourceB only have one bit set. -/// ResourceAB is a group; excluding the most significant bit in the mask, the -/// remaining bits identify the composition of the group. -/// -/// Resource masks are used by the ResourceManager to solve set membership -/// problems with simple bit manipulation operations. -void computeProcResourceMasks(const MCSchedModel &SM, - SmallVectorImpl &Masks); - -/// Compute the reciprocal block throughput from a set of processor resource -/// cycles. The reciprocal block throughput is computed as the MAX between: -/// - NumMicroOps / DispatchWidth -/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource). -double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, - unsigned NumMicroOps, - ArrayRef ProcResourceUsage); -} // namespace mca -} // namespace llvm - -#endif Index: llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt =================================================================== --- llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt +++ llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt @@ -1,32 +0,0 @@ -include_directories(${LLVM_MCA_SOURCE_DIR}/include) - -add_library(LLVMMCA - STATIC - Context.cpp - HWEventListener.cpp - HardwareUnits/HardwareUnit.cpp - HardwareUnits/LSUnit.cpp - HardwareUnits/RegisterFile.cpp - HardwareUnits/ResourceManager.cpp - HardwareUnits/RetireControlUnit.cpp - HardwareUnits/Scheduler.cpp - InstrBuilder.cpp - Instruction.cpp - Pipeline.cpp - Stages/DispatchStage.cpp - Stages/EntryStage.cpp - Stages/ExecuteStage.cpp - Stages/InstructionTables.cpp - Stages/RetireStage.cpp - Stages/Stage.cpp - Support.cpp - ) - -llvm_update_compile_flags(LLVMMCA) -llvm_map_components_to_libnames(libs - MC - Support - ) - -target_link_libraries(LLVMMCA ${libs}) -set_target_properties(LLVMMCA PROPERTIES FOLDER "Libraries") Index: llvm/trunk/tools/llvm-mca/lib/Context.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Context.cpp +++ llvm/trunk/tools/llvm-mca/lib/Context.cpp @@ -1,65 +0,0 @@ -//===---------------------------- Context.cpp -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a class for holding ownership of various simulated -/// hardware units. A Context also provides a utility routine for constructing -/// a default out-of-order pipeline with fetch, dispatch, execute, and retire -/// stages. -/// -//===----------------------------------------------------------------------===// - -#include "Context.h" -#include "HardwareUnits/RegisterFile.h" -#include "HardwareUnits/RetireControlUnit.h" -#include "HardwareUnits/Scheduler.h" -#include "Stages/DispatchStage.h" -#include "Stages/EntryStage.h" -#include "Stages/ExecuteStage.h" -#include "Stages/RetireStage.h" - -namespace llvm { -namespace mca { - -std::unique_ptr -Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, - SourceMgr &SrcMgr) { - const MCSchedModel &SM = STI.getSchedModel(); - - // Create the hardware units defining the backend. - auto RCU = llvm::make_unique(SM); - auto PRF = llvm::make_unique(SM, MRI, Opts.RegisterFileSize); - auto LSU = llvm::make_unique(SM, Opts.LoadQueueSize, - Opts.StoreQueueSize, Opts.AssumeNoAlias); - auto HWS = llvm::make_unique(SM, *LSU); - - // Create the pipeline stages. - auto Fetch = llvm::make_unique(SrcMgr); - auto Dispatch = llvm::make_unique(STI, MRI, Opts.DispatchWidth, - *RCU, *PRF); - auto Execute = llvm::make_unique(*HWS); - auto Retire = llvm::make_unique(*RCU, *PRF); - - // Pass the ownership of all the hardware units to this Context. - addHardwareUnit(std::move(RCU)); - addHardwareUnit(std::move(PRF)); - addHardwareUnit(std::move(LSU)); - addHardwareUnit(std::move(HWS)); - - // Build the pipeline. - auto StagePipeline = llvm::make_unique(); - StagePipeline->appendStage(std::move(Fetch)); - StagePipeline->appendStage(std::move(Dispatch)); - StagePipeline->appendStage(std::move(Execute)); - StagePipeline->appendStage(std::move(Retire)); - return StagePipeline; -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp +++ llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp @@ -1,23 +0,0 @@ -//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a vtable anchor for class HWEventListener. -/// -//===----------------------------------------------------------------------===// - -#include "HWEventListener.h" - -namespace llvm { -namespace mca { - -// Anchor the vtable here. -void HWEventListener::anchor() {} -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp @@ -1,25 +0,0 @@ -//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the anchor for the base class that describes -/// simulated hardware units. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/HardwareUnit.h" - -namespace llvm { -namespace mca { - -// Pin the vtable with this method. -HardwareUnit::~HardwareUnit() = default; - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp @@ -1,190 +0,0 @@ -//===----------------------- LSUnit.cpp --------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A Load-Store Unit for the llvm-mca tool. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/LSUnit.h" -#include "Instruction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, - bool AssumeNoAlias) - : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) { - if (SM.hasExtraProcessorInfo()) { - const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); - if (!LQ_Size && EPI.LoadQueueID) { - const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID); - LQ_Size = LdQDesc.BufferSize; - } - - if (!SQ_Size && EPI.StoreQueueID) { - const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID); - SQ_Size = StQDesc.BufferSize; - } - } -} - -#ifndef NDEBUG -void LSUnit::dump() const { - dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n'; - dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n'; - dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n'; - dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n'; -} -#endif - -void LSUnit::assignLQSlot(unsigned Index) { - assert(!isLQFull()); - assert(LoadQueue.count(Index) == 0); - - LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot \n"); - LoadQueue.insert(Index); -} - -void LSUnit::assignSQSlot(unsigned Index) { - assert(!isSQFull()); - assert(StoreQueue.count(Index) == 0); - - LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot \n"); - StoreQueue.insert(Index); -} - -void LSUnit::dispatch(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned IsMemBarrier = Desc.HasSideEffects; - assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); - - const unsigned Index = IR.getSourceIndex(); - if (Desc.MayLoad) { - if (IsMemBarrier) - LoadBarriers.insert(Index); - assignLQSlot(Index); - } - - if (Desc.MayStore) { - if (IsMemBarrier) - StoreBarriers.insert(Index); - assignSQSlot(Index); - } -} - -LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.MayLoad && isLQFull()) - return LSUnit::LSU_LQUEUE_FULL; - if (Desc.MayStore && isSQFull()) - return LSUnit::LSU_SQUEUE_FULL; - return LSUnit::LSU_AVAILABLE; -} - -bool LSUnit::isReady(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - const unsigned Index = IR.getSourceIndex(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - assert((IsALoad || IsAStore) && "Not a memory operation!"); - assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!"); - assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!"); - - if (IsALoad && !LoadBarriers.empty()) { - unsigned LoadBarrierIndex = *LoadBarriers.begin(); - // A younger load cannot pass a older load barrier. - if (Index > LoadBarrierIndex) - return false; - // A load barrier cannot pass a older load. - if (Index == LoadBarrierIndex && Index != *LoadQueue.begin()) - return false; - } - - if (IsAStore && !StoreBarriers.empty()) { - unsigned StoreBarrierIndex = *StoreBarriers.begin(); - // A younger store cannot pass a older store barrier. - if (Index > StoreBarrierIndex) - return false; - // A store barrier cannot pass a older store. - if (Index == StoreBarrierIndex && Index != *StoreQueue.begin()) - return false; - } - - // A load may not pass a previous store unless flag 'NoAlias' is set. - // A load may pass a previous load. - if (NoAlias && IsALoad) - return true; - - if (StoreQueue.size()) { - // A load may not pass a previous store. - // A store may not pass a previous store. - if (Index > *StoreQueue.begin()) - return false; - } - - // Okay, we are older than the oldest store in the queue. - // If there are no pending loads, then we can say for sure that this - // instruction is ready. - if (isLQEmpty()) - return true; - - // Check if there are no older loads. - if (Index <= *LoadQueue.begin()) - return true; - - // There is at least one younger load. - // - // A store may not pass a previous load. - // A load may pass a previous load. - return !IsAStore; -} - -void LSUnit::onInstructionExecuted(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - const unsigned Index = IR.getSourceIndex(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - - if (IsALoad) { - if (LoadQueue.erase(Index)) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the load queue.\n"); - } - if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) { - LLVM_DEBUG( - dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the set of load barriers.\n"); - LoadBarriers.erase(Index); - } - } - - if (IsAStore) { - if (StoreQueue.erase(Index)) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the store queue.\n"); - } - - if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) { - LLVM_DEBUG( - dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the set of store barriers.\n"); - StoreBarriers.erase(Index); - } - } -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp @@ -1,491 +0,0 @@ -//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a register mapping file class. This class is responsible -/// for managing hardware register files and the tracking of data dependencies -/// between registers. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/RegisterFile.h" -#include "Instruction.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri, - unsigned NumRegs) - : MRI(mri), - RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}), - ZeroRegisters(mri.getNumRegs(), false) { - initialize(SM, NumRegs); -} - -void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) { - // Create a default register file that "sees" all the machine registers - // declared by the target. The number of physical registers in the default - // register file is set equal to `NumRegs`. A value of zero for `NumRegs` - // means: this register file has an unbounded number of physical registers. - RegisterFiles.emplace_back(NumRegs); - if (!SM.hasExtraProcessorInfo()) - return; - - // For each user defined register file, allocate a RegisterMappingTracker - // object. The size of every register file, as well as the mapping between - // register files and register classes is specified via tablegen. - const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo(); - - // Skip invalid register file at index 0. - for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) { - const MCRegisterFileDesc &RF = Info.RegisterFiles[I]; - assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!"); - - // The cost of a register definition is equivalent to the number of - // physical registers that are allocated at register renaming stage. - unsigned Length = RF.NumRegisterCostEntries; - const MCRegisterCostEntry *FirstElt = - &Info.RegisterCostTable[RF.RegisterCostEntryIdx]; - addRegisterFile(RF, ArrayRef(FirstElt, Length)); - } -} - -void RegisterFile::cycleStart() { - for (RegisterMappingTracker &RMT : RegisterFiles) - RMT.NumMoveEliminated = 0; -} - -void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF, - ArrayRef Entries) { - // A default register file is always allocated at index #0. That register file - // is mainly used to count the total number of mappings created by all - // register files at runtime. Users can limit the number of available physical - // registers in register file #0 through the command line flag - // `-register-file-size`. - unsigned RegisterFileIndex = RegisterFiles.size(); - RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle, - RF.AllowZeroMoveEliminationOnly); - - // Special case where there is no register class identifier in the set. - // An empty set of register classes means: this register file contains all - // the physical registers specified by the target. - // We optimistically assume that a register can be renamed at the cost of a - // single physical register. The constructor of RegisterFile ensures that - // a RegisterMapping exists for each logical register defined by the Target. - if (Entries.empty()) - return; - - // Now update the cost of individual registers. - for (const MCRegisterCostEntry &RCE : Entries) { - const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); - for (const MCPhysReg Reg : RC) { - RegisterRenamingInfo &Entry = RegisterMappings[Reg].second; - IndexPlusCostPairTy &IPC = Entry.IndexPlusCost; - if (IPC.first && IPC.first != RegisterFileIndex) { - // The only register file that is allowed to overlap is the default - // register file at index #0. The analysis is inaccurate if register - // files overlap. - errs() << "warning: register " << MRI.getName(Reg) - << " defined in multiple register files."; - } - IPC = std::make_pair(RegisterFileIndex, RCE.Cost); - Entry.RenameAs = Reg; - Entry.AllowMoveElimination = RCE.AllowMoveElimination; - - // Assume the same cost for each sub-register. - for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) { - RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second; - if (!OtherEntry.IndexPlusCost.first && - (!OtherEntry.RenameAs || - MRI.isSuperRegister(*I, OtherEntry.RenameAs))) { - OtherEntry.IndexPlusCost = IPC; - OtherEntry.RenameAs = Reg; - } - } - } - } -} - -void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef UsedPhysRegs) { - unsigned RegisterFileIndex = Entry.IndexPlusCost.first; - unsigned Cost = Entry.IndexPlusCost.second; - if (RegisterFileIndex) { - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - RMT.NumUsedPhysRegs += Cost; - UsedPhysRegs[RegisterFileIndex] += Cost; - } - - // Now update the default register mapping tracker. - RegisterFiles[0].NumUsedPhysRegs += Cost; - UsedPhysRegs[0] += Cost; -} - -void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef FreedPhysRegs) { - unsigned RegisterFileIndex = Entry.IndexPlusCost.first; - unsigned Cost = Entry.IndexPlusCost.second; - if (RegisterFileIndex) { - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - RMT.NumUsedPhysRegs -= Cost; - FreedPhysRegs[RegisterFileIndex] += Cost; - } - - // Now update the default register mapping tracker. - RegisterFiles[0].NumUsedPhysRegs -= Cost; - FreedPhysRegs[0] += Cost; -} - -void RegisterFile::addRegisterWrite(WriteRef Write, - MutableArrayRef UsedPhysRegs) { - WriteState &WS = *Write.getWriteState(); - unsigned RegID = WS.getRegisterID(); - assert(RegID && "Adding an invalid register definition?"); - - LLVM_DEBUG({ - dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex() - << ", " << MRI.getName(RegID) << "]\n"; - }); - - // If RenameAs is equal to RegID, then RegID is subject to register renaming - // and false dependencies on RegID are all eliminated. - - // If RenameAs references the invalid register, then we optimistically assume - // that it can be renamed. In the absence of tablegen descriptors for register - // files, RenameAs is always set to the invalid register ID. In all other - // cases, RenameAs must be either equal to RegID, or it must reference a - // super-register of RegID. - - // If RenameAs is a super-register of RegID, then a write to RegID has always - // a false dependency on RenameAs. The only exception is for when the write - // implicitly clears the upper portion of the underlying register. - // If a write clears its super-registers, then it is renamed as `RenameAs`. - bool IsWriteZero = WS.isWriteZero(); - bool IsEliminated = WS.isEliminated(); - bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated; - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - WS.setPRF(RRI.IndexPlusCost.first); - - if (RRI.RenameAs && RRI.RenameAs != RegID) { - RegID = RRI.RenameAs; - WriteRef &OtherWrite = RegisterMappings[RegID].first; - - if (!WS.clearsSuperRegisters()) { - // The processor keeps the definition of `RegID` together with register - // `RenameAs`. Since this partial write is not renamed, no physical - // register is allocated. - ShouldAllocatePhysRegs = false; - - WriteState *OtherWS = OtherWrite.getWriteState(); - if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { - // This partial write has a false dependency on RenameAs. - assert(!IsEliminated && "Unexpected partial update!"); - OtherWS->addUser(&WS); - } - } - } - - // Update zero registers. - unsigned ZeroRegisterID = - WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); - if (IsWriteZero) { - ZeroRegisters.setBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.setBit(*I); - } else { - ZeroRegisters.clearBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.clearBit(*I); - } - - // If this is move has been eliminated, then the call to tryEliminateMove - // should have already updated all the register mappings. - if (!IsEliminated) { - // Update the mapping for register RegID including its sub-registers. - RegisterMappings[RegID].first = Write; - RegisterMappings[RegID].second.AliasRegID = 0U; - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - RegisterMappings[*I].first = Write; - RegisterMappings[*I].second.AliasRegID = 0U; - } - - // No physical registers are allocated for instructions that are optimized - // in hardware. For example, zero-latency data-dependency breaking - // instructions don't consume physical registers. - if (ShouldAllocatePhysRegs) - allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); - } - - if (!WS.clearsSuperRegisters()) - return; - - for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { - if (!IsEliminated) { - RegisterMappings[*I].first = Write; - RegisterMappings[*I].second.AliasRegID = 0U; - } - - if (IsWriteZero) - ZeroRegisters.setBit(*I); - else - ZeroRegisters.clearBit(*I); - } -} - -void RegisterFile::removeRegisterWrite( - const WriteState &WS, MutableArrayRef FreedPhysRegs) { - // Early exit if this write was eliminated. A write eliminated at register - // renaming stage generates an alias, and it is not added to the PRF. - if (WS.isEliminated()) - return; - - unsigned RegID = WS.getRegisterID(); - - assert(RegID != 0 && "Invalidating an already invalid register?"); - assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && - "Invalidating a write of unknown cycles!"); - assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); - - bool ShouldFreePhysRegs = !WS.isWriteZero(); - unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; - if (RenameAs && RenameAs != RegID) { - RegID = RenameAs; - - if (!WS.clearsSuperRegisters()) { - // Keep the definition of `RegID` together with register `RenameAs`. - ShouldFreePhysRegs = false; - } - } - - if (ShouldFreePhysRegs) - freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs); - - WriteRef &WR = RegisterMappings[RegID].first; - if (WR.getWriteState() == &WS) - WR.invalidate(); - - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WriteRef &OtherWR = RegisterMappings[*I].first; - if (OtherWR.getWriteState() == &WS) - OtherWR.invalidate(); - } - - if (!WS.clearsSuperRegisters()) - return; - - for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WriteRef &OtherWR = RegisterMappings[*I].first; - if (OtherWR.getWriteState() == &WS) - OtherWR.invalidate(); - } -} - -bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { - const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()]; - const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()]; - - // From and To must be owned by the same PRF. - const RegisterRenamingInfo &RRIFrom = RMFrom.second; - const RegisterRenamingInfo &RRITo = RMTo.second; - unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first; - if (RegisterFileIndex != RRITo.IndexPlusCost.first) - return false; - - // We only allow move elimination for writes that update a full physical - // register. On X86, move elimination is possible with 32-bit general purpose - // registers because writes to those registers are not partial writes. If a - // register move is a partial write, then we conservatively assume that move - // elimination fails, since it would either trigger a partial update, or the - // issue of a merge opcode. - // - // Note that this constraint may be lifted in future. For example, we could - // make this model more flexible, and let users customize the set of registers - // (i.e. register classes) that allow move elimination. - // - // For now, we assume that there is a strong correlation between registers - // that allow move elimination, and how those same registers are renamed in - // hardware. - if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) { - // Early exit if the PRF doesn't support move elimination for this register. - if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination) - return false; - if (!WS.clearsSuperRegisters()) - return false; - } - - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - if (RMT.MaxMoveEliminatedPerCycle && - RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle) - return false; - - bool IsZeroMove = ZeroRegisters[RS.getRegisterID()]; - if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove) - return false; - - MCPhysReg FromReg = RS.getRegisterID(); - MCPhysReg ToReg = WS.getRegisterID(); - - // Construct an alias. - MCPhysReg AliasReg = FromReg; - if (RRIFrom.RenameAs) - AliasReg = RRIFrom.RenameAs; - - const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second; - if (RMAlias.AliasRegID) - AliasReg = RMAlias.AliasRegID; - - if (AliasReg != ToReg) { - RegisterMappings[ToReg].second.AliasRegID = AliasReg; - for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I) - RegisterMappings[*I].second.AliasRegID = AliasReg; - } - - RMT.NumMoveEliminated++; - if (IsZeroMove) { - WS.setWriteZero(); - RS.setReadZero(); - } - WS.setEliminated(); - - return true; -} - -void RegisterFile::collectWrites(const ReadState &RS, - SmallVectorImpl &Writes) const { - unsigned RegID = RS.getRegisterID(); - assert(RegID && RegID < RegisterMappings.size()); - LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " - << MRI.getName(RegID) << '\n'); - - // Check if this is an alias. - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - if (RRI.AliasRegID) - RegID = RRI.AliasRegID; - - const WriteRef &WR = RegisterMappings[RegID].first; - if (WR.isValid()) - Writes.push_back(WR); - - // Handle potential partial register updates. - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - const WriteRef &WR = RegisterMappings[*I].first; - if (WR.isValid()) - Writes.push_back(WR); - } - - // Remove duplicate entries and resize the input vector. - if (Writes.size() > 1) { - sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { - return Lhs.getWriteState() < Rhs.getWriteState(); - }); - auto It = std::unique(Writes.begin(), Writes.end()); - Writes.resize(std::distance(Writes.begin(), It)); - } - - LLVM_DEBUG({ - for (const WriteRef &WR : Writes) { - const WriteState &WS = *WR.getWriteState(); - dbgs() << "[PRF] Found a dependent use of Register " - << MRI.getName(WS.getRegisterID()) << " (defined by instruction #" - << WR.getSourceIndex() << ")\n"; - } - }); -} - -void RegisterFile::addRegisterRead(ReadState &RS, - SmallVectorImpl &Defs) const { - unsigned RegID = RS.getRegisterID(); - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - RS.setPRF(RRI.IndexPlusCost.first); - if (RS.isIndependentFromDef()) - return; - - if (ZeroRegisters[RS.getRegisterID()]) - RS.setReadZero(); - collectWrites(RS, Defs); - RS.setDependentWrites(Defs.size()); -} - -unsigned RegisterFile::isAvailable(ArrayRef Regs) const { - SmallVector NumPhysRegs(getNumRegisterFiles()); - - // Find how many new mappings must be created for each register file. - for (const unsigned RegID : Regs) { - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; - if (Entry.first) - NumPhysRegs[Entry.first] += Entry.second; - NumPhysRegs[0] += Entry.second; - } - - unsigned Response = 0; - for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { - unsigned NumRegs = NumPhysRegs[I]; - if (!NumRegs) - continue; - - const RegisterMappingTracker &RMT = RegisterFiles[I]; - if (!RMT.NumPhysRegs) { - // The register file has an unbounded number of microarchitectural - // registers. - continue; - } - - if (RMT.NumPhysRegs < NumRegs) { - // The current register file is too small. This may occur if the number of - // microarchitectural registers in register file #0 was changed by the - // users via flag -reg-file-size. Alternatively, the scheduling model - // specified a too small number of registers for this register file. - LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n"); - - // FIXME: Normalize the instruction register count to match the - // NumPhysRegs value. This is a highly unusual case, and is not expected - // to occur. This normalization is hiding an inconsistency in either the - // scheduling model or in the value that the user might have specified - // for NumPhysRegs. - NumRegs = RMT.NumPhysRegs; - } - - if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs)) - Response |= (1U << I); - } - - return Response; -} - -#ifndef NDEBUG -void RegisterFile::dump() const { - for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { - const RegisterMapping &RM = RegisterMappings[I]; - const RegisterRenamingInfo &RRI = RM.second; - if (ZeroRegisters[I]) { - dbgs() << MRI.getName(I) << ", " << I - << ", PRF=" << RRI.IndexPlusCost.first - << ", Cost=" << RRI.IndexPlusCost.second - << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I] - << ","; - RM.first.dump(); - dbgs() << '\n'; - } - } - - for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { - dbgs() << "Register File #" << I; - const RegisterMappingTracker &RMT = RegisterFiles[I]; - dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs - << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n'; - } -} -#endif - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp @@ -1,326 +0,0 @@ -//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// The classes here represent processor resource units and their management -/// strategy. These classes are managed by the Scheduler. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/ResourceManager.h" -#include "Support.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace mca { - -#define DEBUG_TYPE "llvm-mca" -ResourceStrategy::~ResourceStrategy() = default; - -uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) { - // This method assumes that ReadyMask cannot be zero. - uint64_t CandidateMask = ReadyMask & NextInSequenceMask; - if (CandidateMask) { - CandidateMask = PowerOf2Floor(CandidateMask); - NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); - return CandidateMask; - } - - NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; - RemovedFromNextInSequence = 0; - CandidateMask = ReadyMask & NextInSequenceMask; - - if (CandidateMask) { - CandidateMask = PowerOf2Floor(CandidateMask); - NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); - return CandidateMask; - } - - NextInSequenceMask = ResourceUnitMask; - CandidateMask = PowerOf2Floor(ReadyMask & NextInSequenceMask); - NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); - return CandidateMask; -} - -void DefaultResourceStrategy::used(uint64_t Mask) { - if (Mask > NextInSequenceMask) { - RemovedFromNextInSequence |= Mask; - return; - } - - NextInSequenceMask &= (~Mask); - if (NextInSequenceMask) - return; - - NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; - RemovedFromNextInSequence = 0; -} - -ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index, - uint64_t Mask) - : ProcResourceDescIndex(Index), ResourceMask(Mask), - BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask)>1) { - if (IsAGroup) - ResourceSizeMask = ResourceMask ^ PowerOf2Floor(ResourceMask); - else - ResourceSizeMask = (1ULL << Desc.NumUnits) - 1; - ReadyMask = ResourceSizeMask; - AvailableSlots = BufferSize == -1 ? 0U : static_cast(BufferSize); - Unavailable = false; -} - -bool ResourceState::isReady(unsigned NumUnits) const { - return (!isReserved() || isADispatchHazard()) && - countPopulation(ReadyMask) >= NumUnits; -} - -ResourceStateEvent ResourceState::isBufferAvailable() const { - if (isADispatchHazard() && isReserved()) - return RS_RESERVED; - if (!isBuffered() || AvailableSlots) - return RS_BUFFER_AVAILABLE; - return RS_BUFFER_UNAVAILABLE; -} - -#ifndef NDEBUG -void ResourceState::dump() const { - dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask - << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize - << ", AvailableSlots=" << AvailableSlots - << ", Reserved=" << Unavailable << '\n'; -} -#endif - -static unsigned getResourceStateIndex(uint64_t Mask) { - return std::numeric_limits::digits - countLeadingZeros(Mask); -} - -static std::unique_ptr -getStrategyFor(const ResourceState &RS) { - if (RS.isAResourceGroup() || RS.getNumUnits() > 1) - return llvm::make_unique(RS.getReadyMask()); - return std::unique_ptr(nullptr); -} - -ResourceManager::ResourceManager(const MCSchedModel &SM) { - computeProcResourceMasks(SM, ProcResID2Mask); - Resources.resize(SM.getNumProcResourceKinds()); - Strategies.resize(SM.getNumProcResourceKinds()); - - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - uint64_t Mask = ProcResID2Mask[I]; - unsigned Index = getResourceStateIndex(Mask); - Resources[Index] = - llvm::make_unique(*SM.getProcResource(I), I, Mask); - Strategies[Index] = getStrategyFor(*Resources[Index]); - } -} - -void ResourceManager::setCustomStrategyImpl(std::unique_ptr S, - uint64_t ResourceMask) { - unsigned Index = getResourceStateIndex(ResourceMask); - assert(Index < Resources.size() && "Invalid processor resource index!"); - assert(S && "Unexpected null strategy in input!"); - Strategies[Index] = std::move(S); -} - -unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const { - return Resources[getResourceStateIndex(Mask)]->getProcResourceID(); -} - -unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const { - return Resources[getResourceStateIndex(ResourceID)]->getNumUnits(); -} - -// Returns the actual resource consumed by this Use. -// First, is the primary resource ID. -// Second, is the specific sub-resource ID. -ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) { - unsigned Index = getResourceStateIndex(ResourceID); - ResourceState &RS = *Resources[Index]; - assert(RS.isReady() && "No available units to select!"); - - // Special case where RS is not a group, and it only declares a single - // resource unit. - if (!RS.isAResourceGroup() && RS.getNumUnits() == 1) - return std::make_pair(ResourceID, RS.getReadyMask()); - - uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask()); - if (RS.isAResourceGroup()) - return selectPipe(SubResourceID); - return std::make_pair(ResourceID, SubResourceID); -} - -void ResourceManager::use(const ResourceRef &RR) { - // Mark the sub-resource referenced by RR as used. - unsigned RSID = getResourceStateIndex(RR.first); - ResourceState &RS = *Resources[RSID]; - RS.markSubResourceAsUsed(RR.second); - // Remember to update the resource strategy for non-group resources with - // multiple units. - if (RS.getNumUnits() > 1) - Strategies[RSID]->used(RR.second); - - // If there are still available units in RR.first, - // then we are done. - if (RS.isReady()) - return; - - // Notify to other resources that RR.first is no longer available. - for (std::unique_ptr &Res : Resources) { - ResourceState &Current = *Res; - if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) - continue; - - if (Current.containsResource(RR.first)) { - unsigned Index = getResourceStateIndex(Current.getResourceMask()); - Current.markSubResourceAsUsed(RR.first); - Strategies[Index]->used(RR.first); - } - } -} - -void ResourceManager::release(const ResourceRef &RR) { - ResourceState &RS = *Resources[getResourceStateIndex(RR.first)]; - bool WasFullyUsed = !RS.isReady(); - RS.releaseSubResource(RR.second); - if (!WasFullyUsed) - return; - - for (std::unique_ptr &Res : Resources) { - ResourceState &Current = *Res; - if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) - continue; - - if (Current.containsResource(RR.first)) - Current.releaseSubResource(RR.first); - } -} - -ResourceStateEvent -ResourceManager::canBeDispatched(ArrayRef Buffers) const { - ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; - for (uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - Result = RS.isBufferAvailable(); - if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) - break; - } - return Result; -} - -void ResourceManager::reserveBuffers(ArrayRef Buffers) { - for (const uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); - RS.reserveBuffer(); - - if (RS.isADispatchHazard()) { - assert(!RS.isReserved()); - RS.setReserved(); - } - } -} - -void ResourceManager::releaseBuffers(ArrayRef Buffers) { - for (const uint64_t R : Buffers) - Resources[getResourceStateIndex(R)]->releaseBuffer(); -} - -bool ResourceManager::canBeIssued(const InstrDesc &Desc) const { - return all_of( - Desc.Resources, [&](const std::pair &E) { - unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits; - unsigned Index = getResourceStateIndex(E.first); - return Resources[Index]->isReady(NumUnits); - }); -} - -// Returns true if all resources are in-order, and there is at least one -// resource which is a dispatch hazard (BufferSize = 0). -bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const { - if (!canBeIssued(Desc)) - return false; - bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) { - unsigned Index = getResourceStateIndex(BufferMask); - const ResourceState &Resource = *Resources[Index]; - return Resource.isInOrder() || Resource.isADispatchHazard(); - }); - if (!AllInOrderResources) - return false; - - return any_of(Desc.Buffers, [&](uint64_t BufferMask) { - return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard(); - }); -} - -void ResourceManager::issueInstruction( - const InstrDesc &Desc, - SmallVectorImpl> &Pipes) { - for (const std::pair &R : Desc.Resources) { - const CycleSegment &CS = R.second.CS; - if (!CS.size()) { - releaseResource(R.first); - continue; - } - - assert(CS.begin() == 0 && "Invalid {Start, End} cycles!"); - if (!R.second.isReserved()) { - ResourceRef Pipe = selectPipe(R.first); - use(Pipe); - BusyResources[Pipe] += CS.size(); - // Replace the resource mask with a valid processor resource index. - const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)]; - Pipe.first = RS.getProcResourceID(); - Pipes.emplace_back(std::pair( - Pipe, ResourceCycles(CS.size()))); - } else { - assert((countPopulation(R.first) > 1) && "Expected a group!"); - // Mark this group as reserved. - assert(R.second.isReserved()); - reserveResource(R.first); - BusyResources[ResourceRef(R.first, R.first)] += CS.size(); - } - } -} - -void ResourceManager::cycleEvent(SmallVectorImpl &ResourcesFreed) { - for (std::pair &BR : BusyResources) { - if (BR.second) - BR.second--; - if (!BR.second) { - // Release this resource. - const ResourceRef &RR = BR.first; - - if (countPopulation(RR.first) == 1) - release(RR); - - releaseResource(RR.first); - ResourcesFreed.push_back(RR); - } - } - - for (const ResourceRef &RF : ResourcesFreed) - BusyResources.erase(RF); -} - -void ResourceManager::reserveResource(uint64_t ResourceID) { - ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; - assert(!Resource.isReserved()); - Resource.setReserved(); -} - -void ResourceManager::releaseResource(uint64_t ResourceID) { - ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; - Resource.clearReserved(); -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp @@ -1,88 +0,0 @@ -//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file simulates the hardware responsible for retiring instructions. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/RetireControlUnit.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) - : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { - // Check if the scheduling model provides extra information about the machine - // processor. If so, then use that information to set the reorder buffer size - // and the maximum number of instructions retired per cycle. - if (SM.hasExtraProcessorInfo()) { - const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); - if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; - MaxRetirePerCycle = EPI.MaxRetirePerCycle; - } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); -} - -// Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(const InstRef &IR, - unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast(Queue.size())); - // Zero latency instructions may have zero uOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); - unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; - NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; - return TokenID; -} - -const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { - return Queue[CurrentInstructionSlotIdx]; -} - -void RetireControlUnit::consumeCurrentToken() { - RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; - assert(Current.NumSlots && "Reserved zero slots?"); - assert(Current.IR && "Invalid RUToken in the RCU queue."); - Current.IR.getInstruction()->retire(); - - // Update the slot index to be the next item in the circular queue. - CurrentInstructionSlotIdx += Current.NumSlots; - CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; -} - -void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { - assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); - Queue[TokenID].Executed = true; -} - -#ifndef NDEBUG -void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; -} -#endif - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp +++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp @@ -1,245 +0,0 @@ -//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// A scheduler for processor resource units and processor resource groups. -// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/Scheduler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace mca { - -#define DEBUG_TYPE "llvm-mca" - -void Scheduler::initializeStrategy(std::unique_ptr S) { - // Ensure we have a valid (non-null) strategy object. - Strategy = S ? std::move(S) : llvm::make_unique(); -} - -// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. -SchedulerStrategy::~SchedulerStrategy() = default; -DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; - -#ifndef NDEBUG -void Scheduler::dump() const { - dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; - dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; - dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; - Resources->dump(); -} -#endif - -Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - - switch (Resources->canBeDispatched(Desc.Buffers)) { - case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: - return Scheduler::SC_BUFFERS_FULL; - case ResourceStateEvent::RS_RESERVED: - return Scheduler::SC_DISPATCH_GROUP_STALL; - case ResourceStateEvent::RS_BUFFER_AVAILABLE: - break; - } - - // Give lower priority to LSUnit stall events. - switch (LSU.isAvailable(IR)) { - case LSUnit::LSU_LQUEUE_FULL: - return Scheduler::SC_LOAD_QUEUE_FULL; - case LSUnit::LSU_SQUEUE_FULL: - return Scheduler::SC_STORE_QUEUE_FULL; - case LSUnit::LSU_AVAILABLE: - return Scheduler::SC_AVAILABLE; - } - - llvm_unreachable("Don't know how to process this LSU state result!"); -} - -void Scheduler::issueInstructionImpl( - InstRef &IR, - SmallVectorImpl> &UsedResources) { - Instruction *IS = IR.getInstruction(); - const InstrDesc &D = IS->getDesc(); - - // Issue the instruction and collect all the consumed resources - // into a vector. That vector is then used to notify the listener. - Resources->issueInstruction(D, UsedResources); - - // Notify the instruction that it started executing. - // This updates the internal state of each write. - IS->execute(); - - if (IS->isExecuting()) - IssuedSet.emplace_back(IR); - else if (IS->isExecuted()) - LSU.onInstructionExecuted(IR); -} - -// Release the buffered resources and issue the instruction. -void Scheduler::issueInstruction( - InstRef &IR, - SmallVectorImpl> &UsedResources, - SmallVectorImpl &ReadyInstructions) { - const Instruction &Inst = *IR.getInstruction(); - bool HasDependentUsers = Inst.hasDependentUsers(); - - Resources->releaseBuffers(Inst.getDesc().Buffers); - issueInstructionImpl(IR, UsedResources); - // Instructions that have been issued during this cycle might have unblocked - // other dependent instructions. Dependent instructions may be issued during - // this same cycle if operands have ReadAdvance entries. Promote those - // instructions to the ReadySet and notify the caller that those are ready. - if (HasDependentUsers) - promoteToReadySet(ReadyInstructions); -} - -void Scheduler::promoteToReadySet(SmallVectorImpl &Ready) { - // Scan the set of waiting instructions and promote them to the - // ready queue if operands are all ready. - unsigned RemovedElements = 0; - for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { - InstRef &IR = *I; - if (!IR) - break; - - // Check if this instruction is now ready. In case, force - // a transition in state using method 'update()'. - Instruction &IS = *IR.getInstruction(); - if (!IS.isReady()) - IS.update(); - - // Check if there are still unsolved data dependencies. - if (!isReady(IR)) { - ++I; - continue; - } - - Ready.emplace_back(IR); - ReadySet.emplace_back(IR); - - IR.invalidate(); - ++RemovedElements; - std::iter_swap(I, E - RemovedElements); - } - - WaitSet.resize(WaitSet.size() - RemovedElements); -} - -InstRef Scheduler::select() { - unsigned QueueIndex = ReadySet.size(); - for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { - const InstRef &IR = ReadySet[I]; - if (QueueIndex == ReadySet.size() || - Strategy->compare(IR, ReadySet[QueueIndex])) { - const InstrDesc &D = IR.getInstruction()->getDesc(); - if (Resources->canBeIssued(D)) - QueueIndex = I; - } - } - - if (QueueIndex == ReadySet.size()) - return InstRef(); - - // We found an instruction to issue. - InstRef IR = ReadySet[QueueIndex]; - std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); - ReadySet.pop_back(); - return IR; -} - -void Scheduler::updateIssuedSet(SmallVectorImpl &Executed) { - unsigned RemovedElements = 0; - for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { - InstRef &IR = *I; - if (!IR) - break; - Instruction &IS = *IR.getInstruction(); - if (!IS.isExecuted()) { - LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR - << " is still executing.\n"); - ++I; - continue; - } - - // Instruction IR has completed execution. - LSU.onInstructionExecuted(IR); - Executed.emplace_back(IR); - ++RemovedElements; - IR.invalidate(); - std::iter_swap(I, E - RemovedElements); - } - - IssuedSet.resize(IssuedSet.size() - RemovedElements); -} - -void Scheduler::cycleEvent(SmallVectorImpl &Freed, - SmallVectorImpl &Executed, - SmallVectorImpl &Ready) { - // Release consumed resources. - Resources->cycleEvent(Freed); - - // Propagate the cycle event to the 'Issued' and 'Wait' sets. - for (InstRef &IR : IssuedSet) - IR.getInstruction()->cycleEvent(); - - updateIssuedSet(Executed); - - for (InstRef &IR : WaitSet) - IR.getInstruction()->cycleEvent(); - - promoteToReadySet(Ready); -} - -bool Scheduler::mustIssueImmediately(const InstRef &IR) const { - // Instructions that use an in-order dispatch/issue processor resource must be - // issued immediately to the pipeline(s). Any other in-order buffered - // resources (i.e. BufferSize=1) is consumed. - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc); -} - -void Scheduler::dispatch(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - Resources->reserveBuffers(Desc.Buffers); - - // If necessary, reserve queue entries in the load-store unit (LSU). - bool IsMemOp = Desc.MayLoad || Desc.MayStore; - if (IsMemOp) - LSU.dispatch(IR); - - if (!isReady(IR)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); - WaitSet.push_back(IR); - return; - } - - // Don't add a zero-latency instruction to the Ready queue. - // A zero-latency instruction doesn't consume any scheduler resources. That is - // because it doesn't need to be executed, and it is often removed at register - // renaming stage. For example, register-register moves are often optimized at - // register renaming stage by simply updating register aliases. On some - // targets, zero-idiom instructions (for example: a xor that clears the value - // of a register) are treated specially, and are often eliminated at register - // renaming stage. - if (!mustIssueImmediately(IR)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); - ReadySet.push_back(IR); - } -} - -bool Scheduler::isReady(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsMemOp = Desc.MayLoad || Desc.MayStore; - return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR)); -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp +++ llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp @@ -1,675 +0,0 @@ -//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements the InstrBuilder interface. -/// -//===----------------------------------------------------------------------===// - -#include "InstrBuilder.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, - const llvm::MCInstrInfo &mcii, - const llvm::MCRegisterInfo &mri, - const llvm::MCInstrAnalysis &mcia) - : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), - FirstReturnInst(true) { - computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); -} - -static void initializeUsedResources(InstrDesc &ID, - const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI, - ArrayRef ProcResourceMasks) { - const MCSchedModel &SM = STI.getSchedModel(); - - // Populate resources consumed. - using ResourcePlusCycles = std::pair; - std::vector Worklist; - - // Track cycles contributed by resources that are in a "Super" relationship. - // This is required if we want to correctly match the behavior of method - // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set - // of "consumed" processor resources and resource cycles, the logic in - // ExpandProcResource() doesn't update the number of resource cycles - // contributed by a "Super" resource to a group. - // We need to take this into account when we find that a processor resource is - // part of a group, and it is also used as the "Super" of other resources. - // This map stores the number of cycles contributed by sub-resources that are - // part of a "Super" resource. The key value is the "Super" resource mask ID. - DenseMap SuperResources; - - unsigned NumProcResources = SM.getNumProcResourceKinds(); - APInt Buffers(NumProcResources, 0); - - for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { - const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; - const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); - uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; - if (PR.BufferSize != -1) - Buffers.setBit(PRE->ProcResourceIdx); - CycleSegment RCy(0, PRE->Cycles, false); - Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); - if (PR.SuperIdx) { - uint64_t Super = ProcResourceMasks[PR.SuperIdx]; - SuperResources[Super] += PRE->Cycles; - } - } - - // Sort elements by mask popcount, so that we prioritize resource units over - // resource groups, and smaller groups over larger groups. - sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { - unsigned popcntA = countPopulation(A.first); - unsigned popcntB = countPopulation(B.first); - if (popcntA < popcntB) - return true; - if (popcntA > popcntB) - return false; - return A.first < B.first; - }); - - uint64_t UsedResourceUnits = 0; - - // Remove cycles contributed by smaller resources. - for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { - ResourcePlusCycles &A = Worklist[I]; - if (!A.second.size()) { - A.second.NumUnits = 0; - A.second.setReserved(); - ID.Resources.emplace_back(A); - continue; - } - - ID.Resources.emplace_back(A); - uint64_t NormalizedMask = A.first; - if (countPopulation(A.first) == 1) { - UsedResourceUnits |= A.first; - } else { - // Remove the leading 1 from the resource group mask. - NormalizedMask ^= PowerOf2Floor(NormalizedMask); - } - - for (unsigned J = I + 1; J < E; ++J) { - ResourcePlusCycles &B = Worklist[J]; - if ((NormalizedMask & B.first) == NormalizedMask) { - B.second.CS.subtract(A.second.size() - SuperResources[A.first]); - if (countPopulation(B.first) > 1) - B.second.NumUnits++; - } - } - } - - // A SchedWrite may specify a number of cycles in which a resource group - // is reserved. For example (on target x86; cpu Haswell): - // - // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { - // let ResourceCycles = [2, 2, 3]; - // } - // - // This means: - // Resource units HWPort0 and HWPort1 are both used for 2cy. - // Resource group HWPort01 is the union of HWPort0 and HWPort1. - // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 - // will not be usable for 2 entire cycles from instruction issue. - // - // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency - // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an - // extra delay on top of the 2 cycles latency. - // During those extra cycles, HWPort01 is not usable by other instructions. - for (ResourcePlusCycles &RPC : ID.Resources) { - if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { - // Remove the leading 1 from the resource group mask. - uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); - if ((Mask & UsedResourceUnits) == Mask) - RPC.second.setReserved(); - } - } - - // Identify extra buffers that are consumed through super resources. - for (const std::pair &SR : SuperResources) { - for (unsigned I = 1, E = NumProcResources; I < E; ++I) { - const MCProcResourceDesc &PR = *SM.getProcResource(I); - if (PR.BufferSize == -1) - continue; - - uint64_t Mask = ProcResourceMasks[I]; - if (Mask != SR.first && ((Mask & SR.first) == SR.first)) - Buffers.setBit(I); - } - } - - // Now set the buffers. - if (unsigned NumBuffers = Buffers.countPopulation()) { - ID.Buffers.resize(NumBuffers); - for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { - if (Buffers[I]) { - --NumBuffers; - ID.Buffers[NumBuffers] = ProcResourceMasks[I]; - } - } - } - - LLVM_DEBUG({ - for (const std::pair &R : ID.Resources) - dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n'; - for (const uint64_t R : ID.Buffers) - dbgs() << "\t\tBuffer Mask=" << R << '\n'; - }); -} - -static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, - const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI) { - if (MCDesc.isCall()) { - // We cannot estimate how long this call will take. - // Artificially set an arbitrarily high latency (100cy). - ID.MaxLatency = 100U; - return; - } - - int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); - // If latency is unknown, then conservatively assume a MaxLatency of 100cy. - ID.MaxLatency = Latency < 0 ? 100U : static_cast(Latency); -} - -static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { - // Count register definitions, and skip non register operands in the process. - unsigned I, E; - unsigned NumExplicitDefs = MCDesc.getNumDefs(); - for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { - const MCOperand &Op = MCI.getOperand(I); - if (Op.isReg()) - --NumExplicitDefs; - } - - if (NumExplicitDefs) { - return make_error>( - "Expected more register operand definitions.", MCI); - } - - if (MCDesc.hasOptionalDef()) { - // Always assume that the optional definition is the last operand. - const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); - if (I == MCI.getNumOperands() || !Op.isReg()) { - std::string Message = - "expected a register operand for an optional definition. Instruction " - "has not been correctly analyzed."; - return make_error>(Message, MCI); - } - } - - return ErrorSuccess(); -} - -void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, - unsigned SchedClassID) { - const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); - const MCSchedModel &SM = STI.getSchedModel(); - const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); - - // Assumptions made by this algorithm: - // 1. The number of explicit and implicit register definitions in a MCInst - // matches the number of explicit and implicit definitions according to - // the opcode descriptor (MCInstrDesc). - // 2. Uses start at index #(MCDesc.getNumDefs()). - // 3. There can only be a single optional register definition, an it is - // always the last operand of the sequence (excluding extra operands - // contributed by variadic opcodes). - // - // These assumptions work quite well for most out-of-order in-tree targets - // like x86. This is mainly because the vast majority of instructions is - // expanded to MCInst using a straightforward lowering logic that preserves - // the ordering of the operands. - // - // About assumption 1. - // The algorithm allows non-register operands between register operand - // definitions. This helps to handle some special ARM instructions with - // implicit operand increment (-mtriple=armv7): - // - // vld1.32 {d18, d19}, [r1]! @ - // @ (!!) - // @ - // @ - // @ - // @ > - // - // MCDesc reports: - // 6 explicit operands. - // 1 optional definition - // 2 explicit definitions (!!) - // - // The presence of an 'Imm' operand between the two register definitions - // breaks the assumption that "register definitions are always at the - // beginning of the operand sequence". - // - // To workaround this issue, this algorithm ignores (i.e. skips) any - // non-register operands between register definitions. The optional - // definition is still at index #(NumOperands-1). - // - // According to assumption 2. register reads start at #(NumExplicitDefs-1). - // That means, register R1 from the example is both read and written. - unsigned NumExplicitDefs = MCDesc.getNumDefs(); - unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); - unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; - unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; - if (MCDesc.hasOptionalDef()) - TotalDefs++; - - unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); - ID.Writes.resize(TotalDefs + NumVariadicOps); - // Iterate over the operands list, and skip non-register operands. - // The first NumExplictDefs register operands are expected to be register - // definitions. - unsigned CurrentDef = 0; - unsigned i = 0; - for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { - const MCOperand &Op = MCI.getOperand(i); - if (!Op.isReg()) - continue; - - WriteDescriptor &Write = ID.Writes[CurrentDef]; - Write.OpIndex = i; - if (CurrentDef < NumWriteLatencyEntries) { - const MCWriteLatencyEntry &WLE = - *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); - // Conservatively default to MaxLatency. - Write.Latency = - WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); - Write.SClassOrWriteResourceID = WLE.WriteResourceID; - } else { - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - } - Write.IsOptionalDef = false; - LLVM_DEBUG({ - dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - CurrentDef++; - } - - assert(CurrentDef == NumExplicitDefs && - "Expected more register operand definitions."); - for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { - unsigned Index = NumExplicitDefs + CurrentDef; - WriteDescriptor &Write = ID.Writes[Index]; - Write.OpIndex = ~CurrentDef; - Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; - if (Index < NumWriteLatencyEntries) { - const MCWriteLatencyEntry &WLE = - *STI.getWriteLatencyEntry(&SCDesc, Index); - // Conservatively default to MaxLatency. - Write.Latency = - WLE.Cycles < 0 ? ID.MaxLatency : static_cast(WLE.Cycles); - Write.SClassOrWriteResourceID = WLE.WriteResourceID; - } else { - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - } - - Write.IsOptionalDef = false; - assert(Write.RegisterID != 0 && "Expected a valid phys register!"); - LLVM_DEBUG({ - dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex - << ", PhysReg=" << MRI.getName(Write.RegisterID) - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - } - - if (MCDesc.hasOptionalDef()) { - WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; - Write.OpIndex = MCDesc.getNumOperands() - 1; - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - Write.IsOptionalDef = true; - LLVM_DEBUG({ - dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - } - - if (!NumVariadicOps) - return; - - // FIXME: if an instruction opcode is flagged 'mayStore', and it has no - // "unmodeledSideEffects', then this logic optimistically assumes that any - // extra register operands in the variadic sequence is not a register - // definition. - // - // Otherwise, we conservatively assume that any register operand from the - // variadic sequence is both a register read and a register write. - bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && - !MCDesc.hasUnmodeledSideEffects(); - CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); - for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); - I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { - const MCOperand &Op = MCI.getOperand(OpIndex); - if (!Op.isReg()) - continue; - - WriteDescriptor &Write = ID.Writes[CurrentDef]; - Write.OpIndex = OpIndex; - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - Write.IsOptionalDef = false; - ++CurrentDef; - LLVM_DEBUG({ - dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - } - - ID.Writes.resize(CurrentDef); -} - -void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, - unsigned SchedClassID) { - const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); - unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); - unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); - // Remove the optional definition. - if (MCDesc.hasOptionalDef()) - --NumExplicitUses; - unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); - unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; - ID.Reads.resize(TotalUses); - unsigned CurrentUse = 0; - for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; - ++I, ++OpIndex) { - const MCOperand &Op = MCI.getOperand(OpIndex); - if (!Op.isReg()) - continue; - - ReadDescriptor &Read = ID.Reads[CurrentUse]; - Read.OpIndex = OpIndex; - Read.UseIndex = I; - Read.SchedClassID = SchedClassID; - ++CurrentUse; - LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex - << ", UseIndex=" << Read.UseIndex << '\n'); - } - - // For the purpose of ReadAdvance, implicit uses come directly after explicit - // uses. The "UseIndex" must be updated according to that implicit layout. - for (unsigned I = 0; I < NumImplicitUses; ++I) { - ReadDescriptor &Read = ID.Reads[CurrentUse + I]; - Read.OpIndex = ~I; - Read.UseIndex = NumExplicitUses + I; - Read.RegisterID = MCDesc.getImplicitUses()[I]; - Read.SchedClassID = SchedClassID; - LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex - << ", UseIndex=" << Read.UseIndex << ", RegisterID=" - << MRI.getName(Read.RegisterID) << '\n'); - } - - CurrentUse += NumImplicitUses; - - // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no - // "unmodeledSideEffects", then this logic optimistically assumes that any - // extra register operands in the variadic sequence are not register - // definition. - - bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && - !MCDesc.hasUnmodeledSideEffects(); - for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); - I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { - const MCOperand &Op = MCI.getOperand(OpIndex); - if (!Op.isReg()) - continue; - - ReadDescriptor &Read = ID.Reads[CurrentUse]; - Read.OpIndex = OpIndex; - Read.UseIndex = NumExplicitUses + NumImplicitUses + I; - Read.SchedClassID = SchedClassID; - ++CurrentUse; - LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex - << ", UseIndex=" << Read.UseIndex << '\n'); - } - - ID.Reads.resize(CurrentUse); -} - -Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, - const MCInst &MCI) const { - if (ID.NumMicroOps != 0) - return ErrorSuccess(); - - bool UsesMemory = ID.MayLoad || ID.MayStore; - bool UsesBuffers = !ID.Buffers.empty(); - bool UsesResources = !ID.Resources.empty(); - if (!UsesMemory && !UsesBuffers && !UsesResources) - return ErrorSuccess(); - - StringRef Message; - if (UsesMemory) { - Message = "found an inconsistent instruction that decodes " - "into zero opcodes and that consumes load/store " - "unit resources."; - } else { - Message = "found an inconsistent instruction that decodes " - "to zero opcodes and that consumes scheduler " - "resources."; - } - - return make_error>(Message, MCI); -} - -Expected -InstrBuilder::createInstrDescImpl(const MCInst &MCI) { - assert(STI.getSchedModel().hasInstrSchedModel() && - "Itineraries are not yet supported!"); - - // Obtain the instruction descriptor from the opcode. - unsigned short Opcode = MCI.getOpcode(); - const MCInstrDesc &MCDesc = MCII.get(Opcode); - const MCSchedModel &SM = STI.getSchedModel(); - - // Then obtain the scheduling class information from the instruction. - unsigned SchedClassID = MCDesc.getSchedClass(); - bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); - - // Try to solve variant scheduling classes. - if (IsVariant) { - unsigned CPUID = SM.getProcessorID(); - while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) - SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); - - if (!SchedClassID) { - return make_error>( - "unable to resolve scheduling class for write variant.", MCI); - } - } - - // Check if this instruction is supported. Otherwise, report an error. - const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); - if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { - return make_error>( - "found an unsupported instruction in the input assembly sequence.", - MCI); - } - - // Create a new empty descriptor. - std::unique_ptr ID = llvm::make_unique(); - ID->NumMicroOps = SCDesc.NumMicroOps; - - if (MCDesc.isCall() && FirstCallInst) { - // We don't correctly model calls. - WithColor::warning() << "found a call in the input assembly sequence.\n"; - WithColor::note() << "call instructions are not correctly modeled. " - << "Assume a latency of 100cy.\n"; - FirstCallInst = false; - } - - if (MCDesc.isReturn() && FirstReturnInst) { - WithColor::warning() << "found a return instruction in the input" - << " assembly sequence.\n"; - WithColor::note() << "program counter updates are ignored.\n"; - FirstReturnInst = false; - } - - ID->MayLoad = MCDesc.mayLoad(); - ID->MayStore = MCDesc.mayStore(); - ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); - - initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); - computeMaxLatency(*ID, MCDesc, SCDesc, STI); - - if (Error Err = verifyOperands(MCDesc, MCI)) - return std::move(Err); - - populateWrites(*ID, MCI, SchedClassID); - populateReads(*ID, MCI, SchedClassID); - - LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); - LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); - - // Sanity check on the instruction descriptor. - if (Error Err = verifyInstrDesc(*ID, MCI)) - return std::move(Err); - - // Now add the new descriptor. - SchedClassID = MCDesc.getSchedClass(); - bool IsVariadic = MCDesc.isVariadic(); - if (!IsVariadic && !IsVariant) { - Descriptors[MCI.getOpcode()] = std::move(ID); - return *Descriptors[MCI.getOpcode()]; - } - - VariantDescriptors[&MCI] = std::move(ID); - return *VariantDescriptors[&MCI]; -} - -Expected -InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { - if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) - return *Descriptors[MCI.getOpcode()]; - - if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) - return *VariantDescriptors[&MCI]; - - return createInstrDescImpl(MCI); -} - -Expected> -InstrBuilder::createInstruction(const MCInst &MCI) { - Expected DescOrErr = getOrCreateInstrDesc(MCI); - if (!DescOrErr) - return DescOrErr.takeError(); - const InstrDesc &D = *DescOrErr; - std::unique_ptr NewIS = llvm::make_unique(D); - - // Check if this is a dependency breaking instruction. - APInt Mask; - - unsigned ProcID = STI.getSchedModel().getProcessorID(); - bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID); - bool IsDepBreaking = - IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID); - if (MCIA.isOptimizableRegisterMove(MCI, ProcID)) - NewIS->setOptimizableMove(); - - // Initialize Reads first. - for (const ReadDescriptor &RD : D.Reads) { - int RegID = -1; - if (!RD.isImplicitRead()) { - // explicit read. - const MCOperand &Op = MCI.getOperand(RD.OpIndex); - // Skip non-register operands. - if (!Op.isReg()) - continue; - RegID = Op.getReg(); - } else { - // Implicit read. - RegID = RD.RegisterID; - } - - // Skip invalid register operands. - if (!RegID) - continue; - - // Okay, this is a register operand. Create a ReadState for it. - assert(RegID > 0 && "Invalid register ID found!"); - NewIS->getUses().emplace_back(RD, RegID); - ReadState &RS = NewIS->getUses().back(); - - if (IsDepBreaking) { - // A mask of all zeroes means: explicit input operands are not - // independent. - if (Mask.isNullValue()) { - if (!RD.isImplicitRead()) - RS.setIndependentFromDef(); - } else { - // Check if this register operand is independent according to `Mask`. - // Note that Mask may not have enough bits to describe all explicit and - // implicit input operands. If this register operand doesn't have a - // corresponding bit in Mask, then conservatively assume that it is - // dependent. - if (Mask.getBitWidth() > RD.UseIndex) { - // Okay. This map describe register use `RD.UseIndex`. - if (Mask[RD.UseIndex]) - RS.setIndependentFromDef(); - } - } - } - } - - // Early exit if there are no writes. - if (D.Writes.empty()) - return std::move(NewIS); - - // Track register writes that implicitly clear the upper portion of the - // underlying super-registers using an APInt. - APInt WriteMask(D.Writes.size(), 0); - - // Now query the MCInstrAnalysis object to obtain information about which - // register writes implicitly clear the upper portion of a super-register. - MCIA.clearsSuperRegisters(MRI, MCI, WriteMask); - - // Initialize writes. - unsigned WriteIndex = 0; - for (const WriteDescriptor &WD : D.Writes) { - unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID - : MCI.getOperand(WD.OpIndex).getReg(); - // Check if this is a optional definition that references NoReg. - if (WD.IsOptionalDef && !RegID) { - ++WriteIndex; - continue; - } - - assert(RegID && "Expected a valid register ID!"); - NewIS->getDefs().emplace_back(WD, RegID, - /* ClearsSuperRegs */ WriteMask[WriteIndex], - /* WritesZero */ IsZeroIdiom); - ++WriteIndex; - } - - return std::move(NewIS); -} -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Instruction.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Instruction.cpp +++ llvm/trunk/tools/llvm-mca/lib/Instruction.cpp @@ -1,205 +0,0 @@ -//===--------------------- Instruction.cpp ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines abstractions used by the Pipeline to model register reads, -// register writes and instructions. -// -//===----------------------------------------------------------------------===// - -#include "Instruction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace mca { - -void ReadState::writeStartEvent(unsigned Cycles) { - assert(DependentWrites); - assert(CyclesLeft == UNKNOWN_CYCLES); - - // This read may be dependent on more than one write. This typically occurs - // when a definition is the result of multiple writes where at least one - // write does a partial register update. - // The HW is forced to do some extra bookkeeping to track of all the - // dependent writes, and implement a merging scheme for the partial writes. - --DependentWrites; - TotalCycles = std::max(TotalCycles, Cycles); - - if (!DependentWrites) { - CyclesLeft = TotalCycles; - IsReady = !CyclesLeft; - } -} - -void WriteState::onInstructionIssued() { - assert(CyclesLeft == UNKNOWN_CYCLES); - // Update the number of cycles left based on the WriteDescriptor info. - CyclesLeft = getLatency(); - - // Now that the time left before write-back is known, notify - // all the users. - for (const std::pair &User : Users) { - ReadState *RS = User.first; - unsigned ReadCycles = std::max(0, CyclesLeft - User.second); - RS->writeStartEvent(ReadCycles); - } - - // Notify any writes that are in a false dependency with this write. - if (PartialWrite) - PartialWrite->writeStartEvent(CyclesLeft); -} - -void WriteState::addUser(ReadState *User, int ReadAdvance) { - // If CyclesLeft is different than -1, then we don't need to - // update the list of users. We can just notify the user with - // the actual number of cycles left (which may be zero). - if (CyclesLeft != UNKNOWN_CYCLES) { - unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance); - User->writeStartEvent(ReadCycles); - return; - } - - if (llvm::find_if(Users, [&User](const std::pair &Use) { - return Use.first == User; - }) == Users.end()) { - Users.emplace_back(User, ReadAdvance); - } -} - -void WriteState::addUser(WriteState *User) { - if (CyclesLeft != UNKNOWN_CYCLES) { - User->writeStartEvent(std::max(0, CyclesLeft)); - return; - } - - assert(!PartialWrite && "PartialWrite already set!"); - PartialWrite = User; - User->setDependentWrite(this); -} - -void WriteState::cycleEvent() { - // Note: CyclesLeft can be a negative number. It is an error to - // make it an unsigned quantity because users of this write may - // specify a negative ReadAdvance. - if (CyclesLeft != UNKNOWN_CYCLES) - CyclesLeft--; - - if (DependentWriteCyclesLeft) - DependentWriteCyclesLeft--; -} - -void ReadState::cycleEvent() { - // Update the total number of cycles. - if (DependentWrites && TotalCycles) { - --TotalCycles; - return; - } - - // Bail out immediately if we don't know how many cycles are left. - if (CyclesLeft == UNKNOWN_CYCLES) - return; - - if (CyclesLeft) { - --CyclesLeft; - IsReady = !CyclesLeft; - } -} - -#ifndef NDEBUG -void WriteState::dump() const { - dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID " - << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }"; -} - -void WriteRef::dump() const { - dbgs() << "IID=" << getSourceIndex() << ' '; - if (isValid()) - getWriteState()->dump(); - else - dbgs() << "(null)"; -} -#endif - -void Instruction::dispatch(unsigned RCUToken) { - assert(Stage == IS_INVALID); - Stage = IS_AVAILABLE; - RCUTokenID = RCUToken; - - // Check if input operands are already available. - update(); -} - -void Instruction::execute() { - assert(Stage == IS_READY); - Stage = IS_EXECUTING; - - // Set the cycles left before the write-back stage. - CyclesLeft = getLatency(); - - for (WriteState &WS : getDefs()) - WS.onInstructionIssued(); - - // Transition to the "executed" stage if this is a zero-latency instruction. - if (!CyclesLeft) - Stage = IS_EXECUTED; -} - -void Instruction::forceExecuted() { - assert(Stage == IS_READY && "Invalid internal state!"); - CyclesLeft = 0; - Stage = IS_EXECUTED; -} - -void Instruction::update() { - assert(isDispatched() && "Unexpected instruction stage found!"); - - if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); })) - return; - - // A partial register write cannot complete before a dependent write. - auto IsDefReady = [&](const WriteState &Def) { - if (!Def.getDependentWrite()) { - unsigned CyclesLeft = Def.getDependentWriteCyclesLeft(); - return !CyclesLeft || CyclesLeft < getLatency(); - } - return false; - }; - - if (all_of(getDefs(), IsDefReady)) - Stage = IS_READY; -} - -void Instruction::cycleEvent() { - if (isReady()) - return; - - if (isDispatched()) { - for (ReadState &Use : getUses()) - Use.cycleEvent(); - - for (WriteState &Def : getDefs()) - Def.cycleEvent(); - - update(); - return; - } - - assert(isExecuting() && "Instruction not in-flight?"); - assert(CyclesLeft && "Instruction already executed?"); - for (WriteState &Def : getDefs()) - Def.cycleEvent(); - CyclesLeft--; - if (!CyclesLeft) - Stage = IS_EXECUTED; -} - -const unsigned WriteRef::INVALID_IID = std::numeric_limits::max(); - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt =================================================================== --- llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt +++ llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt @@ -1,22 +0,0 @@ -;===- ./tools/llvm-mca/lib/LLVMBuild.txt -----------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = MCA -parent = Libraries -required_libraries = MC Support Index: llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp +++ llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp @@ -1,97 +0,0 @@ -//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements an ordered container of stages that simulate the -/// pipeline of a hardware backend. -/// -//===----------------------------------------------------------------------===// - -#include "Pipeline.h" -#include "HWEventListener.h" -#include "llvm/Support/Debug.h" - -namespace llvm { -namespace mca { - -#define DEBUG_TYPE "llvm-mca" - -void Pipeline::addEventListener(HWEventListener *Listener) { - if (Listener) - Listeners.insert(Listener); - for (auto &S : Stages) - S->addListener(Listener); -} - -bool Pipeline::hasWorkToProcess() { - return any_of(Stages, [](const std::unique_ptr &S) { - return S->hasWorkToComplete(); - }); -} - -Expected Pipeline::run() { - assert(!Stages.empty() && "Unexpected empty pipeline found!"); - - do { - notifyCycleBegin(); - if (Error Err = runCycle()) - return std::move(Err); - notifyCycleEnd(); - ++Cycles; - } while (hasWorkToProcess()); - - return Cycles; -} - -Error Pipeline::runCycle() { - Error Err = ErrorSuccess(); - // Update stages before we start processing new instructions. - for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { - const std::unique_ptr &S = *I; - Err = S->cycleStart(); - } - - // Now fetch and execute new instructions. - InstRef IR; - Stage &FirstStage = *Stages[0]; - while (!Err && FirstStage.isAvailable(IR)) - Err = FirstStage.execute(IR); - - // Update stages in preparation for a new cycle. - for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { - const std::unique_ptr &S = *I; - Err = S->cycleEnd(); - } - - return Err; -} - -void Pipeline::appendStage(std::unique_ptr S) { - assert(S && "Invalid null stage in input!"); - if (!Stages.empty()) { - Stage *Last = Stages.back().get(); - Last->setNextInSequence(S.get()); - } - - Stages.push_back(std::move(S)); -} - -void Pipeline::notifyCycleBegin() { - LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n'); - for (HWEventListener *Listener : Listeners) - Listener->onCycleBegin(); -} - -void Pipeline::notifyCycleEnd() { - LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n"); - for (HWEventListener *Listener : Listeners) - Listener->onCycleEnd(); -} -} // namespace mca. -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp @@ -1,185 +0,0 @@ -//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file models the dispatch component of an instruction pipeline. -/// -/// The DispatchStage is responsible for updating instruction dependencies -/// and communicating to the simulated instruction scheduler that an instruction -/// is ready to be scheduled for execution. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/DispatchStage.h" -#include "HWEventListener.h" -#include "HardwareUnits/Scheduler.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -void DispatchStage::notifyInstructionDispatched(const InstRef &IR, - ArrayRef UsedRegs, - unsigned UOps) const { - LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); - notifyEvent( - HWInstructionDispatchedEvent(IR, UsedRegs, UOps)); -} - -bool DispatchStage::checkPRF(const InstRef &IR) const { - SmallVector RegDefs; - for (const WriteState &RegDef : IR.getInstruction()->getDefs()) - RegDefs.emplace_back(RegDef.getRegisterID()); - - const unsigned RegisterMask = PRF.isAvailable(RegDefs); - // A mask with all zeroes means: register files are available. - if (RegisterMask) { - notifyEvent( - HWStallEvent(HWStallEvent::RegisterFileStall, IR)); - return false; - } - - return true; -} - -bool DispatchStage::checkRCU(const InstRef &IR) const { - const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; - if (RCU.isAvailable(NumMicroOps)) - return true; - notifyEvent( - HWStallEvent(HWStallEvent::RetireControlUnitStall, IR)); - return false; -} - -bool DispatchStage::canDispatch(const InstRef &IR) const { - return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR); -} - -void DispatchStage::updateRAWDependencies(ReadState &RS, - const MCSubtargetInfo &STI) { - SmallVector DependentWrites; - - // Collect all the dependent writes, and update RS internal state. - PRF.addRegisterRead(RS, DependentWrites); - - // We know that this read depends on all the writes in DependentWrites. - // For each write, check if we have ReadAdvance information, and use it - // to figure out in how many cycles this read becomes available. - const ReadDescriptor &RD = RS.getDescriptor(); - const MCSchedModel &SM = STI.getSchedModel(); - const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); - for (WriteRef &WR : DependentWrites) { - WriteState &WS = *WR.getWriteState(); - unsigned WriteResID = WS.getWriteResourceID(); - int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); - WS.addUser(&RS, ReadAdvance); - } -} - -Error DispatchStage::dispatch(InstRef IR) { - assert(!CarryOver && "Cannot dispatch another instruction!"); - Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - const unsigned NumMicroOps = Desc.NumMicroOps; - if (NumMicroOps > DispatchWidth) { - assert(AvailableEntries == DispatchWidth); - AvailableEntries = 0; - CarryOver = NumMicroOps - DispatchWidth; - CarriedOver = IR; - } else { - assert(AvailableEntries >= NumMicroOps); - AvailableEntries -= NumMicroOps; - } - - // Check if this is an optimizable reg-reg move. - bool IsEliminated = false; - if (IS.isOptimizableMove()) { - assert(IS.getDefs().size() == 1 && "Expected a single input!"); - assert(IS.getUses().size() == 1 && "Expected a single output!"); - IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]); - } - - // A dependency-breaking instruction doesn't have to wait on the register - // input operands, and it is often optimized at register renaming stage. - // Update RAW dependencies if this instruction is not a dependency-breaking - // instruction. A dependency-breaking instruction is a zero-latency - // instruction that doesn't consume hardware resources. - // An example of dependency-breaking instruction on X86 is a zero-idiom XOR. - // - // We also don't update data dependencies for instructions that have been - // eliminated at register renaming stage. - if (!IsEliminated) { - for (ReadState &RS : IS.getUses()) - updateRAWDependencies(RS, STI); - } - - // By default, a dependency-breaking zero-idiom is expected to be optimized - // at register renaming stage. That means, no physical register is allocated - // to the instruction. - SmallVector RegisterFiles(PRF.getNumRegisterFiles()); - for (WriteState &WS : IS.getDefs()) - PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); - - // Reserve slots in the RCU, and notify the instruction that it has been - // dispatched to the schedulers for execution. - IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); - - // Notify listeners of the "instruction dispatched" event, - // and move IR to the next stage. - notifyInstructionDispatched(IR, RegisterFiles, - std::min(DispatchWidth, NumMicroOps)); - return moveToTheNextStage(IR); -} - -Error DispatchStage::cycleStart() { - PRF.cycleStart(); - - if (!CarryOver) { - AvailableEntries = DispatchWidth; - return ErrorSuccess(); - } - - AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; - unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries; - CarryOver -= DispatchedOpcodes; - assert(CarriedOver && "Invalid dispatched instruction"); - - SmallVector RegisterFiles(PRF.getNumRegisterFiles(), 0U); - notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes); - if (!CarryOver) - CarriedOver = InstRef(); - return ErrorSuccess(); -} - -bool DispatchStage::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); - if (Required > AvailableEntries) - return false; - // The dispatch logic doesn't internally buffer instructions. It only accepts - // instructions that can be successfully moved to the next stage during this - // same cycle. - return canDispatch(IR); -} - -Error DispatchStage::execute(InstRef &IR) { - assert(canDispatch(IR) && "Cannot dispatch another instruction!"); - return dispatch(IR); -} - -#ifndef NDEBUG -void DispatchStage::dump() const { - PRF.dump(); - RCU.dump(); -} -#endif -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Stages/EntryStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/EntryStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/EntryStage.cpp @@ -1,76 +0,0 @@ -//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the Fetch stage of an instruction pipeline. Its sole -/// purpose in life is to produce instructions for the rest of the pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/EntryStage.h" -#include "Instruction.h" - -namespace llvm { -namespace mca { - -bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; } - -bool EntryStage::isAvailable(const InstRef & /* unused */) const { - if (CurrentInstruction) - return checkNextStage(CurrentInstruction); - return false; -} - -void EntryStage::getNextInstruction() { - assert(!CurrentInstruction && "There is already an instruction to process!"); - if (!SM.hasNext()) - return; - SourceRef SR = SM.peekNext(); - std::unique_ptr Inst = llvm::make_unique(SR.second); - CurrentInstruction = InstRef(SR.first, Inst.get()); - Instructions.emplace_back(std::move(Inst)); - SM.updateNext(); -} - -llvm::Error EntryStage::execute(InstRef & /*unused */) { - assert(CurrentInstruction && "There is no instruction to process!"); - if (llvm::Error Val = moveToTheNextStage(CurrentInstruction)) - return Val; - - // Move the program counter. - CurrentInstruction.invalidate(); - getNextInstruction(); - return llvm::ErrorSuccess(); -} - -llvm::Error EntryStage::cycleStart() { - if (!CurrentInstruction) - getNextInstruction(); - return llvm::ErrorSuccess(); -} - -llvm::Error EntryStage::cycleEnd() { - // Find the first instruction which hasn't been retired. - auto Range = make_range(&Instructions[NumRetired], Instructions.end()); - auto It = find_if(Range, [](const std::unique_ptr &I) { - return !I->isRetired(); - }); - - NumRetired = std::distance(Instructions.begin(), It); - // Erase instructions up to the first that hasn't been retired. - if ((NumRetired * 2) >= Instructions.size()) { - Instructions.erase(Instructions.begin(), It); - NumRetired = 0; - } - - return llvm::ErrorSuccess(); -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp @@ -1,219 +0,0 @@ -//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the execution stage of an instruction pipeline. -/// -/// The ExecuteStage is responsible for managing the hardware scheduler -/// and issuing notifications that an instruction has been executed. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/ExecuteStage.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) { - switch (Status) { - case Scheduler::SC_LOAD_QUEUE_FULL: - return HWStallEvent::LoadQueueFull; - case Scheduler::SC_STORE_QUEUE_FULL: - return HWStallEvent::StoreQueueFull; - case Scheduler::SC_BUFFERS_FULL: - return HWStallEvent::SchedulerQueueFull; - case Scheduler::SC_DISPATCH_GROUP_STALL: - return HWStallEvent::DispatchGroupStall; - case Scheduler::SC_AVAILABLE: - return HWStallEvent::Invalid; - } - - llvm_unreachable("Don't know how to process this StallKind!"); -} - -bool ExecuteStage::isAvailable(const InstRef &IR) const { - if (Scheduler::Status S = HWS.isAvailable(IR)) { - HWStallEvent::GenericEventType ET = toHWStallEventType(S); - notifyEvent(HWStallEvent(ET, IR)); - return false; - } - - return true; -} - -Error ExecuteStage::issueInstruction(InstRef &IR) { - SmallVector, 4> Used; - SmallVector Ready; - HWS.issueInstruction(IR, Used, Ready); - - notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); - notifyInstructionIssued(IR, Used); - if (IR.getInstruction()->isExecuted()) { - notifyInstructionExecuted(IR); - // FIXME: add a buffer of executed instructions. - if (Error S = moveToTheNextStage(IR)) - return S; - } - - for (const InstRef &I : Ready) - notifyInstructionReady(I); - return ErrorSuccess(); -} - -Error ExecuteStage::issueReadyInstructions() { - InstRef IR = HWS.select(); - while (IR) { - if (Error Err = issueInstruction(IR)) - return Err; - - // Select the next instruction to issue. - IR = HWS.select(); - } - - return ErrorSuccess(); -} - -Error ExecuteStage::cycleStart() { - SmallVector Freed; - SmallVector Executed; - SmallVector Ready; - - HWS.cycleEvent(Freed, Executed, Ready); - - for (const ResourceRef &RR : Freed) - notifyResourceAvailable(RR); - - for (InstRef &IR : Executed) { - notifyInstructionExecuted(IR); - // FIXME: add a buffer of executed instructions. - if (Error S = moveToTheNextStage(IR)) - return S; - } - - for (const InstRef &IR : Ready) - notifyInstructionReady(IR); - - return issueReadyInstructions(); -} - -#ifndef NDEBUG -static void verifyInstructionEliminated(const InstRef &IR) { - const Instruction &Inst = *IR.getInstruction(); - assert(Inst.isEliminated() && "Instruction was not eliminated!"); - assert(Inst.isReady() && "Instruction in an inconsistent state!"); - - // Ensure that instructions eliminated at register renaming stage are in a - // consistent state. - const InstrDesc &Desc = Inst.getDesc(); - assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!"); -} -#endif - -Error ExecuteStage::handleInstructionEliminated(InstRef &IR) { -#ifndef NDEBUG - verifyInstructionEliminated(IR); -#endif - notifyInstructionReady(IR); - notifyInstructionIssued(IR, {}); - IR.getInstruction()->forceExecuted(); - notifyInstructionExecuted(IR); - return moveToTheNextStage(IR); -} - -// Schedule the instruction for execution on the hardware. -Error ExecuteStage::execute(InstRef &IR) { - assert(isAvailable(IR) && "Scheduler is not available!"); - -#ifndef NDEBUG - // Ensure that the HWS has not stored this instruction in its queues. - HWS.sanityCheck(IR); -#endif - - if (IR.getInstruction()->isEliminated()) - return handleInstructionEliminated(IR); - - // Reserve a slot in each buffered resource. Also, mark units with - // BufferSize=0 as reserved. Resources with a buffer size of zero will only - // be released after MCIS is issued, and all the ResourceCycles for those - // units have been consumed. - HWS.dispatch(IR); - notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); - if (!HWS.isReady(IR)) - return ErrorSuccess(); - - // If we did not return early, then the scheduler is ready for execution. - notifyInstructionReady(IR); - - // If we cannot issue immediately, the HWS will add IR to its ready queue for - // execution later, so we must return early here. - if (!HWS.mustIssueImmediately(IR)) - return ErrorSuccess(); - - // Issue IR to the underlying pipelines. - return issueInstruction(IR); -} - -void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const { - LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n'); - notifyEvent( - HWInstructionEvent(HWInstructionEvent::Executed, IR)); -} - -void ExecuteStage::notifyInstructionReady(const InstRef &IR) const { - LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n'); - notifyEvent( - HWInstructionEvent(HWInstructionEvent::Ready, IR)); -} - -void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const { - LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' - << RR.second << "]\n"); - for (HWEventListener *Listener : getListeners()) - Listener->onResourceAvailable(RR); -} - -void ExecuteStage::notifyInstructionIssued( - const InstRef &IR, - ArrayRef> Used) const { - LLVM_DEBUG({ - dbgs() << "[E] Instruction Issued: #" << IR << '\n'; - for (const std::pair &Resource : Used) { - dbgs() << "[E] Resource Used: [" << Resource.first.first << '.' - << Resource.first.second << "], "; - dbgs() << "cycles: " << Resource.second << '\n'; - } - }); - notifyEvent(HWInstructionIssuedEvent(IR, Used)); -} - -void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, - bool Reserved) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.Buffers.empty()) - return; - - SmallVector BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); - std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); - if (Reserved) { - for (HWEventListener *Listener : getListeners()) - Listener->onReservedBuffers(IR, BufferIDs); - return; - } - - for (HWEventListener *Listener : getListeners()) - Listener->onReleasedBuffers(IR, BufferIDs); -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp @@ -1,69 +0,0 @@ -//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements the method InstructionTables::execute(). -/// Method execute() prints a theoretical resource pressure distribution based -/// on the information available in the scheduling model, and without running -/// the pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/InstructionTables.h" - -namespace llvm { -namespace mca { - -Error InstructionTables::execute(InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - UsedResources.clear(); - - // Identify the resources consumed by this instruction. - for (const std::pair Resource : Desc.Resources) { - // Skip zero-cycle resources (i.e., unused resources). - if (!Resource.second.size()) - continue; - unsigned Cycles = Resource.second.size(); - unsigned Index = std::distance( - Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first)); - const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index); - unsigned NumUnits = ProcResource.NumUnits; - if (!ProcResource.SubUnitsIdxBegin) { - // The number of cycles consumed by each unit. - for (unsigned I = 0, E = NumUnits; I < E; ++I) { - ResourceRef ResourceUnit = std::make_pair(Index, 1U << I); - UsedResources.emplace_back( - std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits))); - } - continue; - } - - // This is a group. Obtain the set of resources contained in this - // group. Some of these resources may implement multiple units. - // Uniformly distribute Cycles across all of the units. - for (unsigned I1 = 0; I1 < NumUnits; ++I1) { - unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1]; - const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx); - // Compute the number of cycles consumed by each resource unit. - for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) { - ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2); - UsedResources.emplace_back(std::make_pair( - ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits))); - } - } - } - - // Send a fake instruction issued event to all the views. - HWInstructionIssuedEvent Event(IR, UsedResources); - notifyEvent(Event); - return ErrorSuccess(); -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp @@ -1,62 +0,0 @@ -//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the retire stage of an instruction pipeline. -/// The RetireStage represents the process logic that interacts with the -/// simulated RetireControlUnit hardware. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/RetireStage.h" -#include "HWEventListener.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -llvm::Error RetireStage::cycleStart() { - if (RCU.isEmpty()) - return llvm::ErrorSuccess(); - - const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle(); - unsigned NumRetired = 0; - while (!RCU.isEmpty()) { - if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) - break; - const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); - if (!Current.Executed) - break; - RCU.consumeCurrentToken(); - notifyInstructionRetired(Current.IR); - NumRetired++; - } - - return llvm::ErrorSuccess(); -} - -llvm::Error RetireStage::execute(InstRef &IR) { - RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID()); - return llvm::ErrorSuccess(); -} - -void RetireStage::notifyInstructionRetired(const InstRef &IR) const { - LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n'); - llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); - const Instruction &Inst = *IR.getInstruction(); - - for (const WriteState &WS : Inst.getDefs()) - PRF.removeRegisterWrite(WS, FreedRegs); - notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp +++ llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp @@ -1,29 +0,0 @@ -//===---------------------- Stage.cpp ---------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a stage. -/// A chain of stages compose an instruction pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/Stage.h" - -namespace llvm { -namespace mca { - -// Pin the vtable here in the implementation file. -Stage::~Stage() = default; - -void Stage::addListener(HWEventListener *Listener) { - Listeners.insert(Listener); -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/lib/Support.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/Support.cpp +++ llvm/trunk/tools/llvm-mca/lib/Support.cpp @@ -1,79 +0,0 @@ -//===--------------------- Support.cpp --------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements a few helper functions used by various pipeline -/// components. -/// -//===----------------------------------------------------------------------===// - -#include "Support.h" -#include "llvm/MC/MCSchedule.h" - -namespace llvm { -namespace mca { - -void computeProcResourceMasks(const MCSchedModel &SM, - SmallVectorImpl &Masks) { - unsigned ProcResourceID = 0; - - // Create a unique bitmask for every processor resource unit. - // Skip resource at index 0, since it always references 'InvalidUnit'. - Masks.resize(SM.getNumProcResourceKinds()); - for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { - const MCProcResourceDesc &Desc = *SM.getProcResource(I); - if (Desc.SubUnitsIdxBegin) - continue; - Masks[I] = 1ULL << ProcResourceID; - ProcResourceID++; - } - - // Create a unique bitmask for every processor resource group. - for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { - const MCProcResourceDesc &Desc = *SM.getProcResource(I); - if (!Desc.SubUnitsIdxBegin) - continue; - Masks[I] = 1ULL << ProcResourceID; - for (unsigned U = 0; U < Desc.NumUnits; ++U) { - uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]]; - Masks[I] |= OtherMask; - } - ProcResourceID++; - } -} - -double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, - unsigned NumMicroOps, - ArrayRef ProcResourceUsage) { - // The block throughput is bounded from above by the hardware dispatch - // throughput. That is because the DispatchWidth is an upper bound on the - // number of opcodes that can be part of a single dispatch group. - double Max = static_cast(NumMicroOps) / DispatchWidth; - - // The block throughput is also limited by the amount of hardware parallelism. - // The number of available resource units affects the resource pressure - // distribution, as well as how many blocks can be executed every cycle. - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - unsigned ResourceCycles = ProcResourceUsage[I]; - if (!ResourceCycles) - continue; - - const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); - double Throughput = static_cast(ResourceCycles) / MCDesc.NumUnits; - Max = std::max(Max, Throughput); - } - - // The block reciprocal throughput is computed as the MAX of: - // - (NumMicroOps / DispatchWidth) - // - (NumUnits / ResourceCycles) for every consumed processor resource. - return Max; -} - -} // namespace mca -} // namespace llvm Index: llvm/trunk/tools/llvm-mca/llvm-mca.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/llvm-mca.cpp +++ llvm/trunk/tools/llvm-mca/llvm-mca.cpp @@ -24,8 +24,6 @@ #include "CodeRegion.h" #include "CodeRegionGenerator.h" #include "PipelinePrinter.h" -#include "Stages/EntryStage.h" -#include "Stages/InstructionTables.h" #include "Views/DispatchStatistics.h" #include "Views/InstructionInfoView.h" #include "Views/RegisterFileStatistics.h" @@ -34,13 +32,15 @@ #include "Views/SchedulerStatistics.h" #include "Views/SummaryView.h" #include "Views/TimelineView.h" -#include "include/Context.h" -#include "include/Pipeline.h" -#include "include/Support.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MCA/Context.h" +#include "llvm/MCA/Pipeline.h" +#include "llvm/MCA/Stages/EntryStage.h" +#include "llvm/MCA/Stages/InstructionTables.h" +#include "llvm/MCA/Support.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h"