Index: llvm/tools/llvm-mca/include/Context.h =================================================================== --- llvm/tools/llvm-mca/include/Context.h +++ llvm/tools/llvm-mca/include/Context.h @@ -23,6 +23,7 @@ #include "SourceMgr.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include namespace mca { @@ -30,10 +31,11 @@ /// This is a convenience struct to hold the parameters necessary for creating /// the pre-built "default" out-of-order pipeline. struct PipelineOptions { - PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS, - bool NoAlias) - : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS), - StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {} + PipelineOptions(unsigned MBPC, unsigned DW, unsigned RFS, unsigned LQS, + unsigned SQS, bool NoAlias) + : MaxBytesFetchedPerCycle(MBPC), DispatchWidth(DW), RegisterFileSize(RFS), + LoadQueueSize(LQS), StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {} + unsigned MaxBytesFetchedPerCycle; unsigned DispatchWidth; unsigned RegisterFileSize; unsigned LoadQueueSize; @@ -45,10 +47,12 @@ llvm::SmallVector, 4> Hardware; const llvm::MCRegisterInfo &MRI; const llvm::MCSubtargetInfo &STI; + const llvm::MCCodeEmitter &MCE; public: - Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S) - : MRI(R), STI(S) {} + Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S, + const llvm::MCCodeEmitter &MCE) + : MRI(R), STI(S), MCE(MCE) {} Context(const Context &C) = delete; Context &operator=(const Context &C) = delete; Index: llvm/tools/llvm-mca/include/Stages/FetchStage.h =================================================================== --- llvm/tools/llvm-mca/include/Stages/FetchStage.h +++ llvm/tools/llvm-mca/include/Stages/FetchStage.h @@ -19,6 +19,7 @@ #include "InstrBuilder.h" #include "SourceMgr.h" #include "Stages/Stage.h" +#include "llvm/MC/MCCodeEmitter.h" #include namespace mca { @@ -29,16 +30,27 @@ InstMap Instructions; InstrBuilder &IB; SourceMgr &SM; + const llvm::MCSubtargetInfo &STI; + const llvm::MCCodeEmitter& MCE; + int MaxBytesFetchedPerCycle; + int BytesFetchedThisCycle; // Updates the program counter, and sets 'CurrentInstruction'. llvm::Error getNextInstruction(); + // Determines the length that an instruction has when encoded. + int getEncodedLength(const llvm::MCInst& MCI); + FetchStage(const FetchStage &Other) = delete; FetchStage &operator=(const FetchStage &Other) = delete; public: - FetchStage(InstrBuilder &IB, SourceMgr &SM) - : CurrentInstruction(), IB(IB), SM(SM) {} + FetchStage(InstrBuilder &IB, SourceMgr &SM, + const llvm::MCSubtargetInfo &STI, + const llvm::MCCodeEmitter& MCE, + int MaxBytesFetchedPerCycle) + : CurrentInstruction(), IB(IB), SM(SM), STI(STI), MCE(MCE), + MaxBytesFetchedPerCycle(MaxBytesFetchedPerCycle) {} bool isAvailable(const InstRef &IR) const override; bool hasWorkToComplete() const override; Index: llvm/tools/llvm-mca/lib/Context.cpp =================================================================== --- llvm/tools/llvm-mca/lib/Context.cpp +++ llvm/tools/llvm-mca/lib/Context.cpp @@ -41,7 +41,8 @@ auto HWS = llvm::make_unique(SM, LSU.get()); // Create the pipeline stages. - auto Fetch = llvm::make_unique(IB, SrcMgr); + auto Fetch = llvm::make_unique(IB, SrcMgr, STI, MCE, + Opts.MaxBytesFetchedPerCycle); auto Dispatch = llvm::make_unique(STI, MRI, Opts.DispatchWidth, *RCU, *PRF); auto Execute = llvm::make_unique(*HWS); Index: llvm/tools/llvm-mca/lib/Stages/FetchStage.cpp =================================================================== --- llvm/tools/llvm-mca/lib/Stages/FetchStage.cpp +++ llvm/tools/llvm-mca/lib/Stages/FetchStage.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "Stages/FetchStage.h" +#include "llvm/MC/MCFixup.h" namespace mca { @@ -35,6 +36,20 @@ if (!SM.hasNext()) return llvm::ErrorSuccess(); const SourceRef SR = SM.peekNext(); + + // Limit the number of bytes that can be fetched in one cycle. For example, + // see the Intel Optimization Reference Manual section 2.5.2.2 ("Instruction + // Fetch Unit"), which documents a 16 byte limit per fetch. + if (MaxBytesFetchedPerCycle) { + int EncodedLength = getEncodedLength(*SR.second); + assert((EncodedLength <= MaxBytesFetchedPerCycle) + && "Instruction larger than maximum fetch size!"); + + BytesFetchedThisCycle += EncodedLength; + if (BytesFetchedThisCycle > MaxBytesFetchedPerCycle) + return llvm::ErrorSuccess(); + } + llvm::Expected> InstOrErr = IB.createInstruction(*SR.second); if (!InstOrErr) @@ -43,6 +58,23 @@ return llvm::ErrorSuccess(); } +class length_counting_ostream : public llvm::raw_ostream { + public: + int CurrentPos = 0; + + length_counting_ostream() : llvm::raw_ostream(/*Unbuffered=*/ true) {} + + void write_impl(const char*, size_t Size) override { CurrentPos += Size; } + uint64_t current_pos() const override { return CurrentPos; } +}; + +int FetchStage::getEncodedLength(const llvm::MCInst& MCI) { + length_counting_ostream LCOS; + llvm::SmallVector Fixups; + MCE.encodeInstruction(MCI, LCOS, Fixups, STI); + return LCOS.current_pos(); +} + llvm::Error FetchStage::execute(InstRef & /*unused */) { assert(CurrentInstruction && "There is no instruction to process!"); const SourceRef SR = SM.peekNext(); @@ -60,6 +92,7 @@ } llvm::Error FetchStage::cycleStart() { + BytesFetchedThisCycle = 0; if (!CurrentInstruction) return getNextInstruction(); return llvm::ErrorSuccess(); Index: llvm/tools/llvm-mca/llvm-mca.cpp =================================================================== --- llvm/tools/llvm-mca/llvm-mca.cpp +++ llvm/tools/llvm-mca/llvm-mca.cpp @@ -91,6 +91,13 @@ cl::desc("Number of iterations to run"), cl::cat(ToolOptions), cl::init(0)); +static cl::opt + MaxBytesFetchedPerCycle("bytes-per-cycle", + cl::desc("The maximum number of bytes worth of " + "instructions that can be fetched in one " + "cycle."), + cl::cat(ToolOptions), cl::init(0)); + static cl::opt DispatchWidth("dispatch", cl::desc("Override the processor dispatch width"), cl::cat(ToolOptions), cl::init(0)); @@ -371,6 +378,8 @@ // Tell SrcMgr about this buffer, which is what the parser will pick up. SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc()); + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); assert(MRI && "Unable to create target register info!"); @@ -379,6 +388,9 @@ MCObjectFileInfo MOFI; MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + + MCCodeEmitter* MCE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); + MOFI.InitMCObjectFileInfo(TheTriple, /* PIC= */ false, Ctx); std::unique_ptr BOS; @@ -386,8 +398,6 @@ mca::CodeRegions Regions(SrcMgr); MCStreamerWrapper Str(Ctx, Regions); - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr MCIA( TheTarget->createMCInstrAnalysis(MCII.get())); @@ -463,10 +473,10 @@ mca::InstrBuilder IB(*STI, *MCII, *MRI, *MCIA, *IP); // Create a context to control ownership of the pipeline hardware. - mca::Context MCA(*MRI, *STI); + mca::Context MCA(*MRI, *STI, *MCE); - mca::PipelineOptions PO(Width, RegisterFileSize, LoadQueueSize, - StoreQueueSize, AssumeNoAlias); + mca::PipelineOptions PO(MaxBytesFetchedPerCycle, Width, RegisterFileSize, + LoadQueueSize, StoreQueueSize, AssumeNoAlias); // Number each region in the sequence. unsigned RegionIdx = 0; @@ -491,7 +501,8 @@ if (PrintInstructionTables) { // Create a pipeline, stages, and a printer. auto P = llvm::make_unique(); - P->appendStage(llvm::make_unique(IB, S)); + P->appendStage(llvm::make_unique( + IB, S, *STI, *MCE, MaxBytesFetchedPerCycle)); P->appendStage(llvm::make_unique(SM, IB)); mca::PipelinePrinter Printer(*P);