Index: tools/llvm-mca/Dispatch.h =================================================================== --- tools/llvm-mca/Dispatch.h +++ tools/llvm-mca/Dispatch.h @@ -28,73 +28,125 @@ class Scheduler; class Backend; -/// \brief Keeps track of register definitions. -/// -/// This class tracks register definitions, and performs register renaming -/// to break anti dependencies. -/// By default, there is no limit in the number of register aliases which -/// can be created for the purpose of register renaming. However, users can -/// specify at object construction time a limit in the number of temporary -/// registers which can be used by the register renaming logic. +/// \brief Manages hardware register files, and tracks data dependencies +/// between registers. class RegisterFile { const llvm::MCRegisterInfo &MRI; - // Currently used mappings and maximum used mappings. - // These are to generate statistics only. - unsigned NumUsedMappings; - unsigned MaxUsedMappings; - // Total number of mappings created over time. - unsigned TotalMappingsCreated; - - // The maximum number of register aliases which can be used by the - // register renamer. Defaut value for this field is zero. - // A value of zero for this field means that there is no limit in the - // amount of register mappings which can be created. That is equivalent - // to having a theoretically infinite number of temporary registers. - unsigned TotalMappings; - - // This map contains an entry for every physical register. - // A register index is used as a key value to access a WriteState. - // This is how we track RAW dependencies for dispatched - // instructions. For every register, we track the last seen write only. - // This assumes that all writes fully update both super and sub registers. - // We need a flag in MCInstrDesc to check if a write also updates super - // registers. We can then have a extra tablegen flag to set for instructions. - // This is a separate patch on its own. - std::vector RegisterMappings; - // Assumptions are: - // a) a false dependencies is always removed by the register renamer. - // b) the register renamer can create an "infinite" number of mappings. - // Since we track the number of mappings created, in future we may - // introduce constraints on the number of mappings that can be created. - // For example, the maximum number of registers that are available for - // register renaming purposes may default to the size of the register file. - - // In future, we can extend this design to allow multiple register files, and - // apply different restrictions on the register mappings and the number of - // temporary registers used by mappings. + + // Each register file is described by an instance of RegisterMappingTracker. + // RegisterMappingTracker tracks the number of register mappings dynamically + // allocated during the execution. + struct RegisterMappingTracker { + // Total number of register mappings that are available for register + // renaming. A value of zero for this field means: this register file has + // an unbound number of registers. + const unsigned TotalMappings; + // Number of mappings that are currently in use. + unsigned NumUsedMappings; + // Maximum number of register mappings used. + unsigned MaxUsedMappings; + // Total number of mappings allocated during the entire execution. + unsigned TotalMappingsCreated; + + RegisterMappingTracker(unsigned NumMappings) + : TotalMappings(NumMappings), NumUsedMappings(0), MaxUsedMappings(0), + TotalMappingsCreated(0) {} + }; + + // This is where information related to the various register files is kept. + // This set always contains at least one register file at index #0. That + // register file "sees" all the physical registers declared by the target, and + // (by default) it allows an unbound number of mappings. + // Users can limit the number of mappings that can be created by register file + // #0 through the command line flag `-register-file-size`. + llvm::SmallVector RegisterFiles; + + // RegisterMapping objects are mainly used to track physical register + // definitions. A WriteState object describes a register definition, and it is + // used to track RAW dependencies (see Instruction.h). A RegisterMapping + // object also specifies the set of register files. The mapping between + // physreg and register files is done using a "register file mask". + // + // A register file mask identifies a set of register files. Each bit of the + // mask representation references a specific register file. + // For example: + // 0b0001 --> Register file #0 + // 0b0010 --> Register file #1 + // 0b0100 --> Register file #2 + // + // Note that this implementation allows register files to overlap. + // The maximum number of register files allowed by this implementation is 32. + using RegisterMapping = std::pair; + + // This map contains one entry for each physical register defined by the + // processor scheduling model. + std::vector RegisterMappings; + + // This method creates a new RegisterMappingTracker for a register file that + // contains all the physical registers specified by the register classes in + // the 'RegisterClasses' set. + // + // The long term goal is to let scheduling models optionally describe register + // files via tablegen definitions. This is still a work in progress. + // For example, here is how a tablegen definition for a x86 FP register file + // that features AVX might look like: + // + // def FPRegisterFile : RegisterFile<[VR128RegClass, VR256RegClass], 60> + // + // Here FPRegisterFile contains all the registers defined by register class + // VR128RegClass and VR256RegClass. FPRegisterFile implements 60 + // registers which can be used for register renaming purpose. + // + // The list of register classes is then converted by the tablegen backend into + // a list of register class indices. That list, along with the number of + // available mappings, is then used to create a new RegisterMappingTracker. + void addRegisterFile(llvm::ArrayRef RegisterClasses, + unsigned NumTemps); + + // Allocates a new register mapping in every register file specified by the + // register file mask. This method is called from addRegisterMapping. + void createNewMappings(unsigned RegisterFileMask); + + // Removes a previously allocated mapping from each register file in the + // RegisterFileMask set. This method is called from invalidateRegisterMapping. + void removeMappings(unsigned RegisterFileMask); public: - RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0) - : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0), - TotalMappingsCreated(0), TotalMappings(Mappings), - RegisterMappings(MRI.getNumRegs(), nullptr) {} + RegisterFile(const llvm::MCRegisterInfo &mri, unsigned TempRegs = 0) + : MRI(mri), RegisterMappings(MRI.getNumRegs(), {nullptr, 0U}) { + addRegisterFile({}, TempRegs); + // TODO: teach the scheduling models how to specify multiple register files. + } // Creates a new register mapping for RegID. - // This reserves a temporary register in the register file. + // This reserves a microarchitectural register in every register file that + // contains RegID. void addRegisterMapping(WriteState &WS); // Invalidates register mappings associated to the input WriteState object. - // This releases temporary registers in the register file. + // This releases previously allocated mappings for the physical register + // associated to the WriteState. void invalidateRegisterMapping(const WriteState &WS); - bool isAvailable(unsigned NumRegWrites); + // Checks if there are enough microarchitectural registers in the register + // files. Returns a "response mask" where each bit is the response from a + // RegisterMappingTracker. + // For example: if all register files are available, then the response mask + // is a bitmask of all zeroes. If Instead register file #1 is not available, + // then the response mask is 0b10. + unsigned isAvailable(const llvm::ArrayRef Regs) const; void collectWrites(llvm::SmallVectorImpl &Writes, unsigned RegID) const; void updateOnRead(ReadState &RS, unsigned RegID); - unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; } - unsigned getTotalRegisterMappingsCreated() const { - return TotalMappingsCreated; + unsigned getMaxUsedRegisterMappings(unsigned RegisterFileIndex) const { + assert(RegisterFileIndex < getNumRegisterFiles()); + return RegisterFiles[RegisterFileIndex].MaxUsedMappings; + } + unsigned getTotalRegisterMappingsCreated(unsigned RegisterFileIndex) const { + assert(RegisterFileIndex < getNumRegisterFiles()); + return RegisterFiles[RegisterFileIndex].TotalMappingsCreated; } + unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } #ifndef NDEBUG void dump() const; @@ -291,11 +343,11 @@ unsigned getNumDispatchGroupStalls() const { return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]; } - unsigned getMaxUsedRegisterMappings() const { - return RAT->getMaxUsedRegisterMappings(); + unsigned getMaxUsedRegisterMappings(unsigned RegFileIndex = 0) const { + return RAT->getMaxUsedRegisterMappings(RegFileIndex); } - unsigned getTotalRegisterMappingsCreated() const { - return RAT->getTotalRegisterMappingsCreated(); + unsigned getTotalRegisterMappingsCreated(unsigned RegFileIndex = 0) const { + return RAT->getTotalRegisterMappingsCreated(RegFileIndex); } void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); } Index: tools/llvm-mca/Dispatch.cpp =================================================================== --- tools/llvm-mca/Dispatch.cpp +++ tools/llvm-mca/Dispatch.cpp @@ -25,23 +25,68 @@ namespace mca { +void RegisterFile::addRegisterFile(ArrayRef RegisterClasses, + unsigned NumTemps) { + unsigned RegisterFileIndex = RegisterFiles.size(); + RegisterFiles.emplace_back(NumTemps); + + // Special case where there are no register classes specified. + // An empty register class set means *all* registers. + if (RegisterClasses.empty()) { + for (std::pair &Mapping : RegisterMappings) + Mapping.second |= 1U << RegisterFileIndex; + } else { + for (const unsigned RegClassIndex : RegisterClasses) { + const MCRegisterClass &RC = MRI.getRegClass(RegClassIndex); + for (const MCPhysReg Reg : RC) + RegisterMappings[Reg].second |= 1U << RegisterFileIndex; + } + } +} + +void RegisterFile::createNewMappings(unsigned RegisterFileMask) { + assert(RegisterFileMask && "RegisterFileMask cannot be zero!"); + // Notify each register file that contains RegID. + do { + unsigned NextRegisterFile = llvm::PowerOf2Floor(RegisterFileMask); + unsigned RegisterFileIndex = llvm::countTrailingZeros(NextRegisterFile); + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + RMT.NumUsedMappings++; + RMT.MaxUsedMappings = std::max(RMT.MaxUsedMappings, RMT.NumUsedMappings); + RMT.TotalMappingsCreated++; + RegisterFileMask ^= NextRegisterFile; + } while (RegisterFileMask); +} + +void RegisterFile::removeMappings(unsigned RegisterFileMask) { + assert(RegisterFileMask && "RegisterFileMask cannot be zero!"); + // Notify each register file that contains RegID. + do { + unsigned NextRegisterFile = llvm::PowerOf2Floor(RegisterFileMask); + unsigned RegisterFileIndex = llvm::countTrailingZeros(NextRegisterFile); + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + assert(RMT.NumUsedMappings); + RMT.NumUsedMappings--; + RegisterFileMask ^= NextRegisterFile; + } while (RegisterFileMask); +} + void RegisterFile::addRegisterMapping(WriteState &WS) { unsigned RegID = WS.getRegisterID(); assert(RegID && "Adding an invalid register definition?"); - RegisterMappings[RegID] = &WS; + RegisterMapping &Mapping = RegisterMappings[RegID]; + Mapping.first = &WS; for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) - RegisterMappings[*I] = &WS; - if (MaxUsedMappings == NumUsedMappings) - MaxUsedMappings++; - NumUsedMappings++; - TotalMappingsCreated++; + RegisterMappings[*I].first = &WS; + + createNewMappings(Mapping.second); // If this is a partial update, then we are done. if (!WS.fullyUpdatesSuperRegs()) return; for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) - RegisterMappings[*I] = &WS; + RegisterMappings[*I].first = &WS; } void RegisterFile::invalidateRegisterMapping(const WriteState &WS) { @@ -52,25 +97,25 @@ assert(WS.getCyclesLeft() != -512 && "Invalidating a write of unknown cycles!"); assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); - if (!RegisterMappings[RegID]) + RegisterMapping &Mapping = RegisterMappings[RegID]; + if (!Mapping.first) return; - assert(NumUsedMappings); - NumUsedMappings--; + removeMappings(Mapping.second); - if (RegisterMappings[RegID] == &WS) - RegisterMappings[RegID] = nullptr; + if (Mapping.first == &WS) + Mapping.first = nullptr; for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) - if (RegisterMappings[*I] == &WS) - RegisterMappings[*I] = nullptr; + if (RegisterMappings[*I].first == &WS) + RegisterMappings[*I].first = nullptr; if (!ShouldInvalidateSuperRegs) return; for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) - if (RegisterMappings[*I] == &WS) - RegisterMappings[*I] = nullptr; + if (RegisterMappings[*I].first == &WS) + RegisterMappings[*I].first = nullptr; } // Update the number of used mappings in the event of instruction retired. @@ -87,7 +132,7 @@ void RegisterFile::collectWrites(SmallVectorImpl &Writes, unsigned RegID) const { assert(RegID && RegID < RegisterMappings.size()); - WriteState *WS = RegisterMappings[RegID]; + WriteState *WS = RegisterMappings[RegID].first; if (WS) { DEBUG(dbgs() << "Found a dependent use of RegID=" << RegID << '\n'); Writes.push_back(WS); @@ -95,7 +140,7 @@ // Handle potential partial register updates. for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WS = RegisterMappings[*I]; + WS = RegisterMappings[*I].first; if (WS && std::find(Writes.begin(), Writes.end(), WS) == Writes.end()) { DEBUG(dbgs() << "Found a dependent use of subReg " << *I << " (part of " << RegID << ")\n"); @@ -104,32 +149,67 @@ } } -bool RegisterFile::isAvailable(unsigned NumRegWrites) { - if (!TotalMappings) - return true; - if (NumRegWrites > TotalMappings) { - // The user specified a too small number of registers. - // Artificially set the number of temporaries to NumRegWrites. - errs() << "warning: not enough temporaries in the register file. " - << "The register file size has been automatically increased to " - << NumRegWrites << '\n'; - TotalMappings = NumRegWrites; +unsigned RegisterFile::isAvailable(const ArrayRef Regs) const { + SmallVector NumTemporaries(getNumRegisterFiles()); + + // Find how many new mappings must be created for each register file. + for (const unsigned RegID : Regs) { + unsigned RegisterFileMask = RegisterMappings[RegID].second; + do { + unsigned NextRegisterFileID = llvm::PowerOf2Floor(RegisterFileMask); + NumTemporaries[llvm::countTrailingZeros(NextRegisterFileID)]++; + RegisterFileMask ^= NextRegisterFileID; + } while (RegisterFileMask); } - return NumRegWrites + NumUsedMappings <= TotalMappings; + unsigned Response = 0; + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + unsigned Temporaries = NumTemporaries[I]; + if (!Temporaries) + continue; + + const RegisterMappingTracker &RMT = RegisterFiles[I]; + if (!RMT.TotalMappings) { + // The register file has an unbound number of microarchitectural + // registers. + continue; + } + + if (RMT.TotalMappings < Temporaries) { + // The current register file is too small. This may occur if the number of + // microarchitectural registers in register file #0 was changed by the + // users via flag -reg-file-size. Alternatively, the scheduling model + // specified a too small number of registers for this register file. + report_fatal_error( + "Not enough microarchitectural registers in the register file"); + } + + if (RMT.TotalMappings < RMT.NumUsedMappings + Temporaries) + Response |= (1U << I); + } + + return Response; } #ifndef NDEBUG void RegisterFile::dump() const { - for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) - if (RegisterMappings[I]) { - dbgs() << MRI.getName(I) << ", " << I << ", "; - RegisterMappings[I]->dump(); - } + for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { + const RegisterMapping &RM = RegisterMappings[I]; + dbgs() << MRI.getName(I) << ", " << I << ", Map=" << RM.second << ", "; + if (RM.first) + RM.first->dump(); + else + dbgs() << "(null)\n"; + } - dbgs() << "TotalMappingsCreated: " << TotalMappingsCreated - << ", MaxUsedMappings: " << MaxUsedMappings - << ", NumUsedMappings: " << NumUsedMappings << '\n'; + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + dbgs() << "Register File #" << I; + const RegisterMappingTracker &RMT = RegisterFiles[I]; + dbgs() << "\n TotalMappings: " << RMT.TotalMappings + << "\n TotalMappingsCreated: " << RMT.TotalMappingsCreated + << "\n MaxUsedMappings: " << RMT.MaxUsedMappings + << "\n NumUsedMappings: " << RMT.NumUsedMappings << '\n'; + } } #endif @@ -200,12 +280,26 @@ #endif bool DispatchUnit::checkRAT(const Instruction &Instr) { - const InstrDesc &Desc = Instr.getDesc(); - unsigned NumWrites = Desc.Writes.size(); - if (RAT->isAvailable(NumWrites)) - return true; - DispatchStalls[DS_RAT_REG_UNAVAILABLE]++; - return false; + // Collect register definitions from the WriteStates. + SmallVector RegDefs; + + for (const std::unique_ptr &Def : Instr.getDefs()) + RegDefs.push_back(Def->getRegisterID()); + + unsigned RegisterMask = RAT->isAvailable(RegDefs); + // A mask with all zeroes means: register files are available. + if (RegisterMask) { + // TODO: We currently implement a single hardware counter for all the + // dispatch stalls caused by the unavailability of registers in one of the + // register files. In future, we want to let register files directly notify + // hardware listeners in the event of a dispatch stall. This would simplify + // the logic in Dispatch.[h/cpp], and move all the "hardware counting logic" + // into a View (for example: BackendStatistics). + DispatchStalls[DS_RAT_REG_UNAVAILABLE]++; + return false; + } + + return true; } bool DispatchUnit::checkRCU(const InstrDesc &Desc) { @@ -283,7 +377,7 @@ for (std::unique_ptr &RS : NewInst->getUses()) updateRAWDependencies(*RS, STI); - // Allocate temporary registers in the register file. + // Allocate new mappings. for (std::unique_ptr &WS : NewInst->getDefs()) addNewRegisterMapping(*WS);