Index: llvm/include/llvm/MCA/IncrementalSourceMgr.h =================================================================== --- llvm/include/llvm/MCA/IncrementalSourceMgr.h +++ llvm/include/llvm/MCA/IncrementalSourceMgr.h @@ -30,40 +30,60 @@ /// there is a large number of instructions. std::deque InstStorage; + /// Instructions that are ready to be used. Each of them is a pointer of an + /// \a UniqueInst inside InstStorage. + std::deque Staging; + /// Current instruction index. unsigned TotalCounter; /// End-of-stream flag. bool EOS; + /// Called when an instruction is no longer needed. + using InstFreedCallback = llvm::function_ref; + InstFreedCallback InstFreedCB; + public: IncrementalSourceMgr() : TotalCounter(0U), EOS(false) {} - void clear() { - InstStorage.clear(); - TotalCounter = 0U; - EOS = false; - } + void clear(); + + /// Set a callback that is invoked when a mca::Instruction is + /// no longer needed. This is usually used for recycling the + /// instruction. + void setOnInstFreedCallback(InstFreedCallback CB) { InstFreedCB = CB; } ArrayRef getInstructions() const override { llvm_unreachable("Not applicable"); } - bool hasNext() const override { return TotalCounter < InstStorage.size(); } + bool hasNext() const override { return !Staging.empty(); } bool isEnd() const override { return EOS; } SourceRef peekNext() const override { assert(hasNext()); - return SourceRef(TotalCounter, *InstStorage[TotalCounter]); + return SourceRef(TotalCounter, *Staging.front()); } /// Add a new instruction. - void addInst(UniqueInst &&Inst) { InstStorage.emplace_back(std::move(Inst)); } + void addInst(UniqueInst &&Inst) { + InstStorage.emplace_back(std::move(Inst)); + Staging.push_back(InstStorage.back().get()); + } + + /// Add a recycled instruction. + void addRecycledInst(Instruction *Inst) { Staging.push_back(Inst); } - void updateNext() override { ++TotalCounter; } + void updateNext() override; /// Mark the end of instruction stream. void endOfStream() { EOS = true; } + +#ifndef NDEBUG + /// Print statistic about instruction recycling stats. + void printStatistic(raw_ostream &OS); +#endif }; } // end namespace mca Index: llvm/include/llvm/MCA/InstrBuilder.h =================================================================== --- llvm/include/llvm/MCA/InstrBuilder.h +++ llvm/include/llvm/MCA/InstrBuilder.h @@ -14,6 +14,7 @@ #ifndef LLVM_MCA_INSTRBUILDER_H #define LLVM_MCA_INSTRBUILDER_H +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -25,6 +26,27 @@ namespace llvm { namespace mca { +class RecycledInstErr : public ErrorInfo { + Instruction *RecycledInst; + +public: + static char ID; + + explicit RecycledInstErr(Instruction *Inst) : RecycledInst(Inst) {} + // Always need to carry an Instruction + RecycledInstErr() = delete; + + Instruction *getInst() const { return RecycledInst; } + + void log(raw_ostream &OS) const override { + OS << "Instruction is recycled\n"; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + /// A builder class that knows how to construct Instruction objects. /// /// Every llvm-mca Instruction is described by an object of class InstrDesc. @@ -48,6 +70,10 @@ bool FirstCallInst; bool FirstReturnInst; + using InstRecycleCallback = + llvm::function_ref; + InstRecycleCallback InstRecycleCB; + Expected createInstrDescImpl(const MCInst &MCI); Expected getOrCreateInstrDesc(const MCInst &MCI); @@ -69,6 +95,10 @@ FirstReturnInst = true; } + /// Set a callback which is invoked to retrieve a recycled mca::Instruction + /// or null if there isn't any. + void setInstRecycleCallback(InstRecycleCallback CB) { InstRecycleCB = CB; } + Expected> createInstruction(const MCInst &MCI); }; } // namespace mca Index: llvm/include/llvm/MCA/Instruction.h =================================================================== --- llvm/include/llvm/MCA/Instruction.h +++ llvm/include/llvm/MCA/Instruction.h @@ -476,6 +476,11 @@ // buffer which is a dispatch hazard (BufferSize = 0). unsigned MustIssueImmediately : 1; + // True if the corresponding mca::Instruction can be recycled. Currently only + // instructions that are neither variadic nor have any variant can be + // recycled. + unsigned IsRecyclable : 1; + // A zero latency instruction doesn't consume any scheduler resources. bool isZeroLatency() const { return !MaxLatency && Resources.empty(); } @@ -569,6 +574,7 @@ // Returns true if this instruction is a candidate for move elimination. bool isOptimizableMove() const { return IsOptimizableMove; } void setOptimizableMove() { IsOptimizableMove = true; } + void clearOptimizableMove() { IsOptimizableMove = false; } bool isMemOp() const { return MayLoad || MayStore; } // Getters and setters for general instruction flags. @@ -644,6 +650,8 @@ UsedBuffers(D.UsedBuffers), CriticalRegDep(), CriticalMemDep(), CriticalResourceMask(0), IsEliminated(false) {} + void reset(); + unsigned getRCUTokenID() const { return RCUTokenID; } unsigned getLSUTokenID() const { return LSUTokenID; } void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; } @@ -673,6 +681,7 @@ bool updateDispatched(); bool updatePending(); + bool isInvalid() const { return Stage == IS_INVALID; } bool isDispatched() const { return Stage == IS_DISPATCHED; } bool isPending() const { return Stage == IS_PENDING; } bool isReady() const { return Stage == IS_READY; } Index: llvm/lib/MCA/CMakeLists.txt =================================================================== --- llvm/lib/MCA/CMakeLists.txt +++ llvm/lib/MCA/CMakeLists.txt @@ -9,6 +9,7 @@ HardwareUnits/ResourceManager.cpp HardwareUnits/RetireControlUnit.cpp HardwareUnits/Scheduler.cpp + IncrementalSourceMgr.cpp InstrBuilder.cpp Instruction.cpp Pipeline.cpp Index: llvm/lib/MCA/IncrementalSourceMgr.cpp =================================================================== --- /dev/null +++ llvm/lib/MCA/IncrementalSourceMgr.cpp @@ -0,0 +1,51 @@ +//===-------------------- IncrementalSourceMgr.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines some implementations for IncrementalSourceMgr. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/IncrementalSourceMgr.h" +#ifndef NDEBUG +#include "llvm/Support/Format.h" +#endif + +using namespace llvm; +using namespace llvm::mca; + +void IncrementalSourceMgr::clear() { + Staging.clear(); + InstStorage.clear(); + TotalCounter = 0U; + EOS = false; +} + +void IncrementalSourceMgr::updateNext() { + ++TotalCounter; + Instruction *I = Staging.front(); + Staging.pop_front(); + I->reset(); + + if (InstFreedCB) + InstFreedCB(I); +} + +#ifndef NDEBUG +void IncrementalSourceMgr::printStatistic(raw_ostream &OS) { + unsigned MaxInstStorageSize = InstStorage.size(); + if (MaxInstStorageSize <= TotalCounter) { + auto Ratio = double(MaxInstStorageSize) / double(TotalCounter); + OS << "Cache ratio = " << MaxInstStorageSize << " / " << TotalCounter + << llvm::format(" (%.2f%%)", (1.0 - Ratio) * 100.0) << "\n"; + } else { + OS << "Error: Number of created instructions " + << "are larger than the number of issued instructions\n"; + } +} +#endif Index: llvm/lib/MCA/InstrBuilder.cpp =================================================================== --- llvm/lib/MCA/InstrBuilder.cpp +++ llvm/lib/MCA/InstrBuilder.cpp @@ -14,16 +14,19 @@ #include "llvm/MCA/InstrBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" -#define DEBUG_TYPE "llvm-mca" +#define DEBUG_TYPE "llvm-mca-instrbuilder" namespace llvm { namespace mca { +char RecycledInstErr::ID = 0; + InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, const llvm::MCRegisterInfo &mri, @@ -612,7 +615,7 @@ // Now add the new descriptor. bool IsVariadic = MCDesc.isVariadic(); - if (!IsVariadic && !IsVariant) { + if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { Descriptors[MCI.getOpcode()] = std::move(ID); return *Descriptors[MCI.getOpcode()]; } @@ -632,14 +635,32 @@ return createInstrDescImpl(MCI); } +STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc"); + Expected> InstrBuilder::createInstruction(const MCInst &MCI) { Expected DescOrErr = getOrCreateInstrDesc(MCI); if (!DescOrErr) return DescOrErr.takeError(); const InstrDesc &D = *DescOrErr; - std::unique_ptr NewIS = - std::make_unique(D, MCI.getOpcode()); + Instruction *NewIS = nullptr; + std::unique_ptr CreatedIS; + bool IsInstRecycled = false; + + if (!D.IsRecyclable) + ++NumVariantInst; + + if (D.IsRecyclable && InstRecycleCB) { + if (auto *I = InstRecycleCB(D)) { + NewIS = I; + NewIS->reset(); + IsInstRecycled = true; + } + } + if (!IsInstRecycled) { + CreatedIS = std::make_unique(D, MCI.getOpcode()); + NewIS = CreatedIS.get(); + } const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); const MCSchedClassDesc &SCDesc = @@ -668,6 +689,7 @@ // Initialize Reads first. MCPhysReg RegID = 0; + size_t Idx = 0U; for (const ReadDescriptor &RD : D.Reads) { if (!RD.isImplicitRead()) { // explicit read. @@ -686,15 +708,22 @@ continue; // Okay, this is a register operand. Create a ReadState for it. - NewIS->getUses().emplace_back(RD, RegID); - ReadState &RS = NewIS->getUses().back(); + ReadState *RS = nullptr; + if (IsInstRecycled && Idx < NewIS->getUses().size()) { + NewIS->getUses()[Idx] = ReadState(RD, RegID); + RS = &NewIS->getUses()[Idx++]; + } else { + NewIS->getUses().emplace_back(RD, RegID); + RS = &NewIS->getUses().back(); + ++Idx; + } if (IsDepBreaking) { // A mask of all zeroes means: explicit input operands are not // independent. if (Mask.isZero()) { if (!RD.isImplicitRead()) - RS.setIndependentFromDef(); + RS->setIndependentFromDef(); } else { // Check if this register operand is independent according to `Mask`. // Note that Mask may not have enough bits to describe all explicit and @@ -704,15 +733,21 @@ if (Mask.getBitWidth() > RD.UseIndex) { // Okay. This map describe register use `RD.UseIndex`. if (Mask[RD.UseIndex]) - RS.setIndependentFromDef(); + RS->setIndependentFromDef(); } } } } + if (IsInstRecycled && Idx < NewIS->getUses().size()) + NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx); // Early exit if there are no writes. - if (D.Writes.empty()) - return std::move(NewIS); + if (D.Writes.empty()) { + if (IsInstRecycled) + return llvm::make_error(NewIS); + else + return std::move(CreatedIS); + } // Track register writes that implicitly clear the upper portion of the // underlying super-registers using an APInt. @@ -725,6 +760,7 @@ // Initialize writes. unsigned WriteIndex = 0; + Idx = 0U; for (const WriteDescriptor &WD : D.Writes) { RegID = WD.isImplicitWrite() ? WD.RegisterID : MCI.getOperand(WD.OpIndex).getReg(); @@ -735,13 +771,26 @@ } assert(RegID && "Expected a valid register ID!"); - NewIS->getDefs().emplace_back(WD, RegID, - /* ClearsSuperRegs */ WriteMask[WriteIndex], - /* WritesZero */ IsZeroIdiom); + if (IsInstRecycled && Idx < NewIS->getDefs().size()) { + NewIS->getDefs()[Idx++] = + WriteState(WD, RegID, + /* ClearsSuperRegs */ WriteMask[WriteIndex], + /* WritesZero */ IsZeroIdiom); + } else { + NewIS->getDefs().emplace_back(WD, RegID, + /* ClearsSuperRegs */ WriteMask[WriteIndex], + /* WritesZero */ IsZeroIdiom); + ++Idx; + } ++WriteIndex; } + if (IsInstRecycled && Idx < NewIS->getDefs().size()) + NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx); - return std::move(NewIS); + if (IsInstRecycled) + return llvm::make_error(NewIS); + else + return std::move(CreatedIS); } } // namespace mca } // namespace llvm Index: llvm/lib/MCA/Instruction.cpp =================================================================== --- llvm/lib/MCA/Instruction.cpp +++ llvm/lib/MCA/Instruction.cpp @@ -148,6 +148,18 @@ return CriticalRegDep; } +void Instruction::reset() { + // Note that this won't clear read/write descriptors + // or other non-trivial fields + Stage = IS_INVALID; + CyclesLeft = UNKNOWN_CYCLES; + clearOptimizableMove(); + RCUTokenID = 0; + LSUTokenID = 0; + CriticalResourceMask = 0; + IsEliminated = false; +} + void Instruction::dispatch(unsigned RCUToken) { assert(Stage == IS_INVALID); Stage = IS_DISPATCHED; Index: llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp =================================================================== --- llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp +++ llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp @@ -78,3 +78,104 @@ ASSERT_EQ(*BV, *V) << "Value of '" << F << "' does not match"; } } + +TEST_F(X86TestBase, TestInstructionRecycling) { + mca::Context MCA(*MRI, *STI); + + std::unordered_map> + RecycledInsts; + auto GetRecycledInst = [&](const mca::InstrDesc &Desc) -> mca::Instruction * { + auto It = RecycledInsts.find(&Desc); + if (It != RecycledInsts.end()) { + auto &Insts = It->second; + if (Insts.size()) { + mca::Instruction *I = *Insts.begin(); + Insts.erase(I); + return I; + } + } + return nullptr; + }; + auto AddRecycledInst = [&](mca::Instruction *I) { + const mca::InstrDesc &D = I->getDesc(); + RecycledInsts[&D].insert(I); + }; + + mca::IncrementalSourceMgr ISM; + ISM.setOnInstFreedCallback(AddRecycledInst); + + // Empty CustomBehaviour. + auto CB = std::make_unique(*STI, ISM, *MCII); + + auto PO = getDefaultPipelineOptions(); + auto P = MCA.createDefaultPipeline(PO, ISM, *CB); + ASSERT_TRUE(P); + + SmallVector MCIs; + getSimpleInsts(MCIs, /*Repeats=*/100); + + // Add views. + auto SV = std::make_unique(STI->getSchedModel(), MCIs, + PO.DispatchWidth); + P->addEventListener(SV.get()); + + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get()); + IB.setInstRecycleCallback(GetRecycledInst); + + // Tile size = 7 + for (unsigned i = 0U, E = MCIs.size(); i < E;) { + for (unsigned TE = i + 7; i < TE && i < E; ++i) { + Expected> InstOrErr = + IB.createInstruction(MCIs[i]); + + if (!InstOrErr) { + mca::Instruction *RecycledInst = nullptr; + // Check if the returned instruction is a recycled + // one. + auto RemainingE = handleErrors(InstOrErr.takeError(), + [&](const mca::RecycledInstErr &RC) { + RecycledInst = RC.getInst(); + }); + ASSERT_FALSE(bool(RemainingE)); + ASSERT_TRUE(RecycledInst); + ISM.addRecycledInst(RecycledInst); + } else { + ISM.addInst(std::move(InstOrErr.get())); + } + } + + // Run the pipeline. + Expected Cycles = P->run(); + if (!Cycles) { + // Should be a stream pause error. + ASSERT_TRUE(Cycles.errorIsA()); + llvm::consumeError(Cycles.takeError()); + } + } + + ISM.endOfStream(); + // Has to terminate properly. + Expected Cycles = P->run(); + ASSERT_TRUE(bool(Cycles)); + + json::Value Result = SV->toJSON(); + auto *ResultObj = Result.getAsObject(); + ASSERT_TRUE(ResultObj); + + // Run the baseline. + json::Object BaselineResult; + auto E = runBaselineMCA(BaselineResult, MCIs); + ASSERT_FALSE(bool(E)) << "Failed to run baseline"; + auto *BaselineObj = BaselineResult.getObject(SV->getNameAsString()); + ASSERT_TRUE(BaselineObj) << "Does not contain SummaryView result"; + + // Compare the results. + constexpr const char *Fields[] = {"Instructions", "TotalCycles", "TotaluOps", + "BlockRThroughput"}; + for (const auto *F : Fields) { + auto V = ResultObj->getInteger(F); + auto BV = BaselineObj->getInteger(F); + ASSERT_TRUE(V && BV); + ASSERT_EQ(*BV, *V) << "Value of '" << F << "' does not match"; + } +}