Index: tools/llvm-mca/Backend.cpp =================================================================== --- tools/llvm-mca/Backend.cpp +++ tools/llvm-mca/Backend.cpp @@ -36,7 +36,7 @@ std::unique_ptr NewIS( IB->createInstruction(STI, IR.first, *IR.second)); const InstrDesc &Desc = NewIS->getDesc(); - if (!DU->isAvailable(Desc.NumMicroOps) || !DU->canDispatch(Desc)) + if (!DU->isAvailable(Desc.NumMicroOps) || !DU->canDispatch(*NewIS)) break; Instruction *IS = NewIS.get(); Index: tools/llvm-mca/Dispatch.h =================================================================== --- tools/llvm-mca/Dispatch.h +++ tools/llvm-mca/Dispatch.h @@ -28,56 +28,93 @@ class Scheduler; class Backend; -/// \brief Keeps track of register definitions. -/// -/// This class tracks register definitions, and performs register renaming -/// to break anti dependencies. -/// By default, there is no limit in the number of register aliases which -/// can be created for the purpose of register renaming. However, users can -/// specify at object construction time a limit in the number of temporary -/// registers which can be used by the register renaming logic. +/// \brief Manages hardware register files, and and tracks data dependencies +/// between registers. class RegisterFile { const llvm::MCRegisterInfo &MRI; - // Currently used mappings and maximum used mappings. - // These are to generate statistics only. - unsigned NumUsedMappings; - unsigned MaxUsedMappings; - // Total number of mappings created over time. - unsigned TotalMappingsCreated; - - // The maximum number of register aliases which can be used by the - // register renamer. Defaut value for this field is zero. - // A value of zero for this field means that there is no limit in the - // amount of register mappings which can be created. That is equivalent - // to having a theoretically infinite number of temporary registers. - unsigned TotalMappings; - - // This map contains an entry for every physical register. - // A register index is used as a key value to access a WriteState. - // This is how we track RAW dependencies for dispatched - // instructions. For every register, we track the last seen write only. - // This assumes that all writes fully update both super and sub registers. - // We need a flag in MCInstrDesc to check if a write also updates super - // registers. We can then have a extra tablegen flag to set for instructions. - // This is a separate patch on its own. - std::vector RegisterMappings; - // Assumptions are: - // a) a false dependencies is always removed by the register renamer. - // b) the register renamer can create an "infinite" number of mappings. - // Since we track the number of mappings created, in future we may - // introduce constraints on the number of mappings that can be created. - // For example, the maximum number of registers that are available for - // register renaming purposes may default to the size of the register file. - - // In future, we can extend this design to allow multiple register files, and - // apply different restrictions on the register mappings and the number of - // temporary registers used by mappings. + + // Each register file is described by an instance of RegisterFileInfo. + // RegisterFileInfo track the number of temporary registers dynamically + // allocated during the execution. + struct RegisterFileInfo { + // Total number of temporary registers that are available for register + // renaming. A value of zero for this field means: this register file has + // an unbound number of temporaries. + unsigned TotalMappings; + // Number of temporary registers that are currently in use. + unsigned NumUsedMappings; + // Maximum number of temporary registers used. + unsigned MaxUsedMappings; + // Total number of temporaries dynamically allocated. + unsigned TotalMappingsCreated; + + RegisterFileInfo(unsigned NumTemporaries) + : TotalMappings(NumTemporaries), NumUsedMappings(0), MaxUsedMappings(0), + TotalMappingsCreated(0) {} + }; + + // This is where information related to the various register files is kept. + // This set always contains at least one register file at index #0. That + // register file "sees" all the physical registers declared by the target, and + // (by default) it has un unbound number of temporaries. + // Users can override the number of temporary registers available in register + // file #0 through the command line flag `-register-file-size`. + llvm::SmallVector RegisterFiles; + + // RegisterMapping objects are used to maps physical registers to WriteState + // objects. WriteState describes a register definition, and it is used to + // track RAW dependencies (see Instruction.h). + // A RegisterMapping object also specifies the set of register files + // associated to a physical register. The mapping between physreg and register + // files is done using a "register file mask". + // + // A register file mask identifies a set of register files. Each bit of the + // mask representation identifies a specif register file. + // For example: + // 0b0001 --> Register file #0 + // 0b0010 --> Register file #1 + // 0b0100 --> Register file #2 + // + // Note that this implementation allows register files to overlap. Also, + // by design, register file #0 contains all the physical registers. + using RegisterMapping = std::pair; + + // This map contains an entry for every physical register defined by the + // processor scheduling model. The goal is to track the temporary registers + // usage for every register file. + std::vector RegisterMappings; + + // Initializes a RegisterFileInfo object for every register file + // declared by the target. + // + // This method is called by the constructor of RegisterFile for every register + // file defined by the scheduling model. + // The idea is that every scheduling model should be able to describe the + // composition of a register file, and how many temporaries are available. + // For example, a tablegen definitions for a x86 FP register file that + // features AVX would look like this: + // + // def FPRegisterFile : RegisterFile<[VR128RegClass, VR256RegClass], 60> + // + // Here FPRegisterFile contains all the registers within the two specified + // register classes. FPRegisterFile implements 60 temporary registers which + // can be used for register renaming purpose. + // + // The list of register classes is then converted by the tablegen backend into + // a list of register class indices. That list, along with the number of + // temporaries, is then used to create a new RegisterFileInfo object. + void addRegisterFile(llvm::ArrayRef RegisterClasses, + unsigned NumTemps); + void reserveTemporaries(unsigned RegisterFileMask); + void releaseTemporaries(unsigned RegisterFileMask); public: - RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0) - : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0), - TotalMappingsCreated(0), TotalMappings(Mappings), - RegisterMappings(MRI.getNumRegs(), nullptr) {} + RegisterFile(const llvm::MCRegisterInfo &mri, unsigned TempRegs = 0) + : MRI(mri), RegisterMappings(MRI.getNumRegs(), { nullptr, 0U }) { + // Add the "default" register file to the set. + addRegisterFile({}, TempRegs); + // TODO: teach the scheduling models how to specify multiple registers. + } // Creates a new register mapping for RegID. // This reserves a temporary register in the register file. @@ -87,14 +124,25 @@ // This releases temporary registers in the register file. void invalidateRegisterMapping(const WriteState &WS); - bool isAvailable(unsigned NumRegWrites); + // Checks if there are enough temporary registers in the register files. + // Returns a "response mask" where each bit is the response from a + // RegisterFileInfo. + // For example: if all register files are available, then the response mask + // is a bitmask of all zeroes. If Instead register file #1 is not available, + // then the response mask is 0b10. + unsigned isAvailable(const llvm::ArrayRef Regs); void collectWrites(llvm::SmallVectorImpl &Writes, unsigned RegID) const; void updateOnRead(ReadState &RS, unsigned RegID); - unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; } - unsigned getTotalRegisterMappingsCreated() const { - return TotalMappingsCreated; + unsigned getMaxUsedRegisterMappings(unsigned RegisterFileIndex) const { + assert(RegisterFileIndex < getNumRegisterFiles()); + return RegisterFiles[RegisterFileIndex].MaxUsedMappings; + } + unsigned getTotalRegisterMappingsCreated(unsigned RegisterFileIndex) const { + assert(RegisterFileIndex < getNumRegisterFiles()); + return RegisterFiles[RegisterFileIndex].TotalMappingsCreated; } + unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } #ifndef NDEBUG void dump() const; @@ -233,7 +281,7 @@ // stored into a vector `DispatchStall` which is always of size DS_LAST. std::vector DispatchStalls; - bool checkRAT(const InstrDesc &Desc); + bool checkRAT(const Instruction &Inst); bool checkRCU(const InstrDesc &Desc); bool checkScheduler(const InstrDesc &Desc); @@ -260,9 +308,10 @@ bool isRCUEmpty() const { return RCU->isEmpty(); } - bool canDispatch(const InstrDesc &Desc) { + bool canDispatch(const Instruction &Inst) { + const InstrDesc &Desc = Inst.getDesc(); assert(isAvailable(Desc.NumMicroOps)); - return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc); + return checkRCU(Desc) && checkRAT(Inst) && checkScheduler(Desc); } unsigned dispatch(unsigned IID, Instruction *NewInst, @@ -290,11 +339,11 @@ unsigned getNumDispatchGroupStalls() const { return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]; } - unsigned getMaxUsedRegisterMappings() const { - return RAT->getMaxUsedRegisterMappings(); + unsigned getMaxUsedRegisterMappings(unsigned RegFileIndex = 0) const { + return RAT->getMaxUsedRegisterMappings(RegFileIndex); } - unsigned getTotalRegisterMappingsCreated() const { - return RAT->getTotalRegisterMappingsCreated(); + unsigned getTotalRegisterMappingsCreated(unsigned RegFileIndex = 0) const { + return RAT->getTotalRegisterMappingsCreated(RegFileIndex); } void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); } Index: tools/llvm-mca/Dispatch.cpp =================================================================== --- tools/llvm-mca/Dispatch.cpp +++ tools/llvm-mca/Dispatch.cpp @@ -25,23 +25,68 @@ namespace mca { +void RegisterFile::addRegisterFile(ArrayRef RegisterClasses, + unsigned NumTemps) { + unsigned RegisterFileIndex = RegisterFiles.size(); + RegisterFiles.emplace_back(NumTemps); + + // Special case where there are no register classes specified. + // An empty register class set means *all* registers. + if (RegisterClasses.empty()) { + for (std::pair &Mapping : RegisterMappings) + Mapping.second |= 1U << RegisterFileIndex; + } else { + for (const unsigned RegClassIndex : RegisterClasses) { + const MCRegisterClass &RC = MRI.getRegClass(RegClassIndex); + for (auto I = RC.begin(), E = RC.end(); I < E; ++I) + RegisterMappings[*I].second |= 1U << RegisterFileIndex; + } + } +} + +void RegisterFile::reserveTemporaries(unsigned RegisterFileMask) { + assert(RegisterFileMask && "RegisterFileMask cannot be zero!"); + // Notify each register file that contains RegID. + do { + unsigned NextRegisterFile = llvm::PowerOf2Floor(RegisterFileMask); + unsigned RegisterFileIndex = llvm::countTrailingZeros(NextRegisterFile); + RegisterFileInfo &RFI = RegisterFiles[RegisterFileIndex]; + RFI.NumUsedMappings++; + RFI.MaxUsedMappings = std::max(RFI.MaxUsedMappings, RFI.NumUsedMappings); + RFI.TotalMappingsCreated++; + RegisterFileMask ^= NextRegisterFile; + } while (RegisterFileMask); +} + +void RegisterFile::releaseTemporaries(unsigned RegisterFileMask) { + assert(RegisterFileMask && "RegisterFileMask cannot be zero!"); + // Notify each register file that contains RegID. + do { + unsigned NextRegisterFile = llvm::PowerOf2Floor(RegisterFileMask); + unsigned RegisterFileIndex = llvm::countTrailingZeros(NextRegisterFile); + RegisterFileInfo &RFI = RegisterFiles[RegisterFileIndex]; + assert(RFI.NumUsedMappings); + RFI.NumUsedMappings--; + RegisterFileMask ^= NextRegisterFile; + } while (RegisterFileMask); +} + void RegisterFile::addRegisterMapping(WriteState &WS) { unsigned RegID = WS.getRegisterID(); assert(RegID && "Adding an invalid register definition?"); - RegisterMappings[RegID] = &WS; + RegisterMapping &Mapping = RegisterMappings[RegID]; + Mapping.first = &WS; for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) - RegisterMappings[*I] = &WS; - if (MaxUsedMappings == NumUsedMappings) - MaxUsedMappings++; - NumUsedMappings++; - TotalMappingsCreated++; + RegisterMappings[*I].first = &WS; + + reserveTemporaries(Mapping.second); // If this is a partial update, then we are done. if (!WS.fullyUpdatesSuperRegs()) return; for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) - RegisterMappings[*I] = &WS; + RegisterMappings[*I].first = &WS; } void RegisterFile::invalidateRegisterMapping(const WriteState &WS) { @@ -52,25 +97,25 @@ assert(WS.getCyclesLeft() != -512 && "Invalidating a write of unknown cycles!"); assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); - if (!RegisterMappings[RegID]) + RegisterMapping &Mapping = RegisterMappings[RegID]; + if (!Mapping.first) return; - assert(NumUsedMappings); - NumUsedMappings--; + releaseTemporaries(Mapping.second); - if (RegisterMappings[RegID] == &WS) - RegisterMappings[RegID] = nullptr; + if (Mapping.first == &WS) + Mapping.first = nullptr; for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) - if (RegisterMappings[*I] == &WS) - RegisterMappings[*I] = nullptr; + if (RegisterMappings[*I].first == &WS) + RegisterMappings[*I].first = nullptr; if (!ShouldInvalidateSuperRegs) return; for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) - if (RegisterMappings[*I] == &WS) - RegisterMappings[*I] = nullptr; + if (RegisterMappings[*I].first == &WS) + RegisterMappings[*I].first = nullptr; } // Update the number of used mappings in the event of instruction retired. @@ -87,7 +132,7 @@ void RegisterFile::collectWrites(SmallVectorImpl &Writes, unsigned RegID) const { assert(RegID && RegID < RegisterMappings.size()); - WriteState *WS = RegisterMappings[RegID]; + WriteState *WS = RegisterMappings[RegID].first; if (WS) { DEBUG(dbgs() << "Found a dependent use of RegID=" << RegID << '\n'); Writes.push_back(WS); @@ -95,7 +140,7 @@ // Handle potential partial register updates. for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WS = RegisterMappings[*I]; + WS = RegisterMappings[*I].first; if (WS && std::find(Writes.begin(), Writes.end(), WS) == Writes.end()) { DEBUG(dbgs() << "Found a dependent use of subReg " << *I << " (part of " << RegID << ")\n"); @@ -104,32 +149,72 @@ } } -bool RegisterFile::isAvailable(unsigned NumRegWrites) { - if (!TotalMappings) - return true; - if (NumRegWrites > TotalMappings) { - // The user specified a too small number of registers. - // Artificially set the number of temporaries to NumRegWrites. - errs() << "warning: not enough temporaries in the register file. " - << "The register file size has been automatically increased to " - << NumRegWrites << '\n'; - TotalMappings = NumRegWrites; +unsigned RegisterFile::isAvailable(const ArrayRef Regs) { + SmallVector NumTemporaries(0U); + NumTemporaries.resize(getNumRegisterFiles()); + + // Find out how many temporary registers are required from each register file. + for (const unsigned RegID : Regs) { + unsigned RegisterFileMask = RegisterMappings[RegID].second; + do { + unsigned NextRegisterFileID = llvm::PowerOf2Floor(RegisterFileMask); + NumTemporaries[llvm::countTrailingZeros(NextRegisterFileID)]++; + RegisterFileMask ^= NextRegisterFileID; + } while (RegisterFileMask); + } + + unsigned Response = 0; + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + unsigned Temporaries = NumTemporaries[I]; + if (!Temporaries) + continue; + + RegisterFileInfo &RFI = RegisterFiles[I]; + if (!RFI.TotalMappings) { + // The register file has an unbound number of registers. This is the + // default for register file #0, unless users override the number of + // temporaries via command line flag -reg-file-size. + continue; + } + + if (RFI.TotalMappings < Temporaries) { + // The current register file is too small. This may occur if the number of + // temporaries for register file #0 was changed by the users via flag + // -reg-file-size. Alternatively, the scheduling model specified a too + // small number of registers for this register file. Artificially increase + // the total number of mappings. + errs() << "warning: not enough temporaries in register file " << I + << ". The register file size has been increased to " << Temporaries + << '\n'; + RFI.TotalMappings = Temporaries; + } + + if (RFI.TotalMappings < RFI.NumUsedMappings + Temporaries) + Response |= (1U << I); } - return NumRegWrites + NumUsedMappings <= TotalMappings; + return Response; } #ifndef NDEBUG void RegisterFile::dump() const { - for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) - if (RegisterMappings[I]) { - dbgs() << MRI.getName(I) << ", " << I << ", "; - RegisterMappings[I]->dump(); - } + for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { + const RegisterMapping &RM = RegisterMappings[I]; + dbgs() << MRI.getName(I) << ", " << I << ", Map=" << RM.second << ", "; + if (RM.first) + RM.first->dump(); + else + dbgs() << "(null)\n"; + } - dbgs() << "TotalMappingsCreated: " << TotalMappingsCreated - << ", MaxUsedMappings: " << MaxUsedMappings - << ", NumUsedMappings: " << NumUsedMappings << '\n'; + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + dbgs() << "Register File #" << I; + const RegisterFileInfo &RFI = RegisterFiles[I]; + dbgs() << "\n TotalMappings: " << RFI.TotalMappings + << "\n TotalMappingsCreated: " << RFI.TotalMappingsCreated + << "\n MaxUsedMappings: " << RFI.MaxUsedMappings + << "\n NumUsedMappings: " << RFI.NumUsedMappings << '\n'; + } } #endif @@ -199,12 +284,28 @@ } #endif -bool DispatchUnit::checkRAT(const InstrDesc &Desc) { - unsigned NumWrites = Desc.Writes.size(); - if (RAT->isAvailable(NumWrites)) - return true; - DispatchStalls[DS_RAT_REG_UNAVAILABLE]++; - return false; +bool DispatchUnit::checkRAT(const Instruction &Instr) { + // Collect register definitions from the WriteStates. + SmallVector RegDefs; + + for (const std::unique_ptr &Def : Instr.getDefs()) + RegDefs.push_back(Def->getRegisterID()); + + unsigned RegisterMask = RAT->isAvailable(RegDefs); + // A mask with all zeroes means: register files are available. + if (RegisterMask) { + // TODO: We currently implement a single hardware counter for all the + // dispatch stalls caused by the unavailability of temporaries in one of the + // register files. + // In future, we want to let register files directly notify hardware + // listeners in the event of a dispatch stall. This would simplify the + // logic in Dispatch.[h/cpp], and move all the "hardware counting logic" + // into a View (for example: BackendStatistics). + DispatchStalls[DS_RAT_REG_UNAVAILABLE]++; + return false; + } + + return true; } bool DispatchUnit::checkRCU(const InstrDesc &Desc) {