diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h --- a/llvm/include/llvm/MCA/CustomBehaviour.h +++ b/llvm/include/llvm/MCA/CustomBehaviour.h @@ -49,6 +49,33 @@ /// scheduling model. virtual void postProcessInstruction(std::unique_ptr &Inst, const MCInst &MCI) {} + + /// This method can be overriden by targets to modify an instruction's + /// InstrDesc. This has to be called separate from postProcessInstruction + /// because by the time we call postProcessInstruction, the InstrDesc will + /// be const. Look at the definition of InstrDesc to get an idea of what kind + /// of changes you might need to make in this function vs + /// postProcessInstruction. Some examples are the MayLoad, MayStore, and + /// RetireOOO flags. + /// The return value of this function represents whether any modifications + /// were made or not. This is important because this allows InstrBuilder + /// to print some debug output making it clear that modifications were made. + /// If you wish to modify the operand or resource attributes of an InstrDesc + /// object, be sure to review how they're built originally within + /// InstrBuilder.cpp so that you can conform to the expected structure. + virtual bool modifyInstrDesc(InstrDesc &ID, const MCInst &MCI) { + return false; + } + + /// Some targets may wish to maintain some state within their IPP. + /// IPP is created in llvm-mca.cpp before we start working on any individual + /// code region. Because of this, if IPP maintains state, it will have its + /// state carry over between code regions. This is likely not desirable as + /// each region should be thought of as completely independent of the other + /// regions. The resetState() method gets invoked within llvm-mca.cpp at the + /// beginning of each code region so targets can override this function to + /// clear any state that they have left from the previous code region. + virtual void resetState() {} }; /// Class which can be overriden by targets to enforce instruction diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h --- a/llvm/include/llvm/MCA/InstrBuilder.h +++ b/llvm/include/llvm/MCA/InstrBuilder.h @@ -18,6 +18,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CustomBehaviour.h" #include "llvm/MCA/Instruction.h" #include "llvm/MCA/Support.h" #include "llvm/Support/Error.h" @@ -40,6 +41,7 @@ const MCInstrInfo &MCII; const MCRegisterInfo &MRI; const MCInstrAnalysis *MCIA; + InstrPostProcess &IPP; SmallVector ProcResourceMasks; DenseMap> Descriptors; @@ -60,7 +62,8 @@ public: InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, - const MCRegisterInfo &RI, const MCInstrAnalysis *IA); + const MCRegisterInfo &RI, const MCInstrAnalysis *IA, + InstrPostProcess &IPP); void clear() { Descriptors.clear(); diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -155,6 +155,8 @@ // Optional definitions are allowed to reference regID zero (i.e. "no // register"). bool IsOptionalDef; + // True only if this is a variadic write. + bool IsVariadicDef; bool isImplicitWrite() const { return OpIndex < 0; }; }; @@ -173,6 +175,8 @@ // Scheduling Class Index. It is used to query the scheduling model for the // MCSchedClassDesc object. unsigned SchedClassID; + // True only if this is a variadic read. + bool IsVariadicUse; bool isImplicitRead() const { return OpIndex < 0; }; }; diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -27,14 +27,113 @@ InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, const llvm::MCRegisterInfo &mri, - const llvm::MCInstrAnalysis *mcia) - : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), + const llvm::MCInstrAnalysis *mcia, + InstrPostProcess &ipp) + : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IPP(ipp), FirstCallInst(true), FirstReturnInst(true) { const MCSchedModel &SM = STI.getSchedModel(); ProcResourceMasks.resize(SM.getNumProcResourceKinds()); computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); } +static void debugPrintResources(const InstrDesc &ID) { + LLVM_DEBUG({ + for (const std::pair &R : ID.Resources) + dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " + << "Reserved=" << R.second.isReserved() << ", " + << "#Units=" << R.second.NumUnits << ", " + << "cy=" << R.second.size() << '\n'; + uint64_t BufferIDs = ID.UsedBuffers; + while (BufferIDs) { + uint64_t Current = BufferIDs & (-BufferIDs); + dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; + BufferIDs ^= Current; + } + dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; + dbgs() << "\t\tImplicitly Used Units=" + << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n'; + dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) + << '\n'; + }); +} + +static void debugPrintExplicitWrite(const WriteDescriptor &Write) { + assert(!Write.isImplicitWrite() && !Write.IsVariadicDef && + "This function should only be called for non-variadic explicit " + "writes."); // Negative integer for implicit writes + + LLVM_DEBUG({ + dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); +} + +static void debugPrintImplicitWrite(const WriteDescriptor &Write, + const MCRegisterInfo &MRI) { + assert(Write.isImplicitWrite() && !Write.IsVariadicDef && + "This function should only be called for non-variadic implicit " + "writes."); // Negative integer for implicit writes + + LLVM_DEBUG({ + dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex + << ", PhysReg=" << MRI.getName(Write.RegisterID) + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); +} + +static void debugPrintOptionalWrite(const WriteDescriptor &Write) { + assert( + Write.IsOptionalDef && !Write.IsVariadicDef && + "This function should only be called for non-variadic optional writes."); + + LLVM_DEBUG({ + dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); +} + +static void debugPrintVariadicWrite(const WriteDescriptor &Write) { + assert(Write.IsVariadicDef && + "This function should only be called for variadic writes."); + + LLVM_DEBUG({ + dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); +} + +static void debugPrintExplicitRead(const ReadDescriptor &Read) { + assert( + !Read.isImplicitRead() && !Read.IsVariadicUse && + "This function should only be called for non-variadic explicit reads."); + + LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex + << ", UseIndex=" << Read.UseIndex << '\n'); +} + +static void debugPrintImplicitRead(const ReadDescriptor &Read, + const MCRegisterInfo &MRI) { + assert( + Read.isImplicitRead() && !Read.IsVariadicUse && + "This function should only be called for non-variadic implicit reads."); + + LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex + << ", UseIndex=" << Read.UseIndex + << ", RegisterID=" << MRI.getName(Read.RegisterID) << '\n'); +} + +static void debugPrintVariadicRead(const ReadDescriptor &Read) { + assert(Read.IsVariadicUse && + "This function should only be called for variadic reads."); + + LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex + << ", UseIndex=" << Read.UseIndex << '\n'); +} + static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, @@ -224,24 +323,7 @@ ID.UsedProcResUnits = UsedResourceUnits; ID.UsedProcResGroups = UsedResourceGroups; - LLVM_DEBUG({ - for (const std::pair &R : ID.Resources) - dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " - << "Reserved=" << R.second.isReserved() << ", " - << "#Units=" << R.second.NumUnits << ", " - << "cy=" << R.second.size() << '\n'; - uint64_t BufferIDs = ID.UsedBuffers; - while (BufferIDs) { - uint64_t Current = BufferIDs & (-BufferIDs); - dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; - BufferIDs ^= Current; - } - dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; - dbgs() << "\t\tImplicitly Used Units=" - << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n'; - dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) - << '\n'; - }); + LLVM_DEBUG(debugPrintResources(ID)); } static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, @@ -377,11 +459,10 @@ Write.SClassOrWriteResourceID = 0; } Write.IsOptionalDef = false; - LLVM_DEBUG({ - dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); + Write.IsVariadicDef = false; + + LLVM_DEBUG(debugPrintExplicitWrite(Write)); + CurrentDef++; } @@ -406,13 +487,9 @@ } Write.IsOptionalDef = false; + Write.IsVariadicDef = false; assert(Write.RegisterID != 0 && "Expected a valid phys register!"); - LLVM_DEBUG({ - dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex - << ", PhysReg=" << MRI.getName(Write.RegisterID) - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); + LLVM_DEBUG(debugPrintImplicitWrite(Write, MRI)); } if (MCDesc.hasOptionalDef()) { @@ -422,11 +499,9 @@ Write.Latency = ID.MaxLatency; Write.SClassOrWriteResourceID = 0; Write.IsOptionalDef = true; - LLVM_DEBUG({ - dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); + Write.IsVariadicDef = false; + + LLVM_DEBUG(debugPrintOptionalWrite(Write)); } if (!NumVariadicOps) @@ -446,12 +521,10 @@ Write.Latency = ID.MaxLatency; Write.SClassOrWriteResourceID = 0; Write.IsOptionalDef = false; + Write.IsVariadicDef = true; ++CurrentDef; - LLVM_DEBUG({ - dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); + + LLVM_DEBUG(debugPrintVariadicWrite(Write)); } ID.Writes.resize(CurrentDef); @@ -479,9 +552,10 @@ Read.OpIndex = OpIndex; Read.UseIndex = I; Read.SchedClassID = SchedClassID; + Read.IsVariadicUse = false; ++CurrentUse; - LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex - << ", UseIndex=" << Read.UseIndex << '\n'); + + LLVM_DEBUG(debugPrintExplicitRead(Read)); } // For the purpose of ReadAdvance, implicit uses come directly after explicit @@ -492,9 +566,9 @@ Read.UseIndex = NumExplicitUses + I; Read.RegisterID = MCDesc.getImplicitUses()[I]; Read.SchedClassID = SchedClassID; - LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex - << ", UseIndex=" << Read.UseIndex << ", RegisterID=" - << MRI.getName(Read.RegisterID) << '\n'); + Read.IsVariadicUse = false; + + LLVM_DEBUG(debugPrintImplicitRead(Read, MRI)); } CurrentUse += NumImplicitUses; @@ -510,9 +584,10 @@ Read.OpIndex = OpIndex; Read.UseIndex = NumExplicitUses + NumImplicitUses + I; Read.SchedClassID = SchedClassID; + Read.IsVariadicUse = true; ++CurrentUse; - LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex - << ", UseIndex=" << Read.UseIndex << '\n'); + + LLVM_DEBUG(debugPrintVariadicRead(Read)); } ID.Reads.resize(CurrentUse); @@ -572,6 +647,7 @@ LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); + LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); // Create a new empty descriptor. std::unique_ptr ID = std::make_unique(); @@ -616,6 +692,44 @@ if (Error Err = verifyInstrDesc(*ID, MCI)) return std::move(Err); + // Give IPP the chance to modify the InstrDesc. This needs to happen now + // because the function we are in now returns the InstrDesc as const. + bool Modified = IPP.modifyInstrDesc(*ID, MCI); + LLVM_DEBUG({ + if (Modified) { + dbgs() << "\nPrevious instruction has been modified by target's " + "InstrPostProcess class.\nInstruction is now:"; + dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'; + dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'; + dbgs() << "\t\tOpcode=" << Opcode << '\n'; + + debugPrintResources(*ID); + + for (const WriteDescriptor &Write : ID->Writes) { + if (Write.IsOptionalDef) + debugPrintOptionalWrite(Write); + else if (Write.IsVariadicDef) + debugPrintVariadicWrite(Write); + else if (Write.isImplicitWrite()) + debugPrintImplicitWrite(Write, MRI); + else + debugPrintExplicitWrite(Write); + } + + for (const ReadDescriptor &Read : ID->Reads) { + if (Read.IsVariadicUse) + debugPrintVariadicRead(Read); + else if (Read.isImplicitRead()) + debugPrintImplicitRead(Read, MRI); + else + debugPrintExplicitRead(Read); + } + + dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'; + dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'; + } + }); + // Now add the new descriptor. bool IsVariadic = MCDesc.isVariadic(); if (!IsVariadic && !IsVariant) { diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h --- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h +++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h @@ -39,6 +39,8 @@ void postProcessInstruction(std::unique_ptr &Inst, const MCInst &MCI) override; + + bool modifyInstrDesc(InstrDesc &ID, const MCInst &MCI) override; }; } // namespace mca diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp --- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp +++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp @@ -43,6 +43,10 @@ setMemBarriers(Inst, MCI); } +bool X86InstrPostProcess::modifyInstrDesc(InstrDesc &ID, const MCInst &MCI) { + return false; +} + } // namespace mca } // namespace llvm diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -465,8 +465,23 @@ const MCSchedModel &SM = STI->getSchedModel(); + std::unique_ptr IPP; + if (!DisableCustomBehaviour) { + // TODO: It may be a good idea to separate CB and IPP so that they can + // be used independently of each other. What I mean by this is to add + // an extra command-line arg --disable-ipp so that CB and IPP can be + // toggled without needing to toggle both of them together. + IPP = std::unique_ptr( + TheTarget->createInstrPostProcess(*STI, *MCII)); + } + if (!IPP) { + // If the target doesn't have its own IPP implemented (or the -disable-cb + // flag is set) then we use the base class (which does nothing). + IPP = std::make_unique(*STI, *MCII); + } + // Create an instruction builder. - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get()); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IPP); // Create a context to control ownership of the pipeline hardware. mca::Context MCA(*MRI, *STI); @@ -498,16 +513,10 @@ ArrayRef Insts = Region->getInstructions(); mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); - std::unique_ptr IPP; - if (!DisableCustomBehaviour) { - IPP = std::unique_ptr( - TheTarget->createInstrPostProcess(*STI, *MCII)); - } - if (!IPP) - // If the target doesn't have its own IPP implemented (or the - // -disable-cb flag is set) then we use the base class - // (which does nothing). - IPP = std::make_unique(*STI, *MCII); + // IPP may maintain state within a given code region, but since the IPP + // object persists between the different code regions, we should give it + // a chance to reset its state at the beginning of each region. + IPP->resetState(); SmallVector> LoweredSequence; for (const MCInst &MCI : Insts) {