Index: include/llvm/MC/MCSchedule.h =================================================================== --- include/llvm/MC/MCSchedule.h +++ include/llvm/MC/MCSchedule.h @@ -172,6 +172,18 @@ unsigned NumRegisterFiles; const MCRegisterCostEntry *RegisterCostTable; unsigned NumRegisterCostEntries; + + struct PfmCountersInfo { + // An optional name of a performance counter that can be used to measure + // cycles. + const char *CycleCounter; + + // For each MCProcResourceDesc defined by the processor, an optional list of + // names of performance counters that can be used to measure the resource + // utilization. + const char **IssueCounters; + }; + PfmCountersInfo PfmCounters; }; /// Machine model for scheduling, bundling, and heuristics. Index: include/llvm/Target/TargetSchedule.td =================================================================== --- include/llvm/Target/TargetSchedule.td +++ include/llvm/Target/TargetSchedule.td @@ -182,7 +182,8 @@ // // SchedModel ties these units to a processor for any stand-alone defs // of this class. -class ProcResourceUnits { +class ProcResourceUnits pfmCounters> { ProcResourceKind Kind = kind; int NumUnits = num; ProcResourceKind Super = ?; @@ -197,8 +198,8 @@ // Subtargets typically define processor resource kind and number of // units in one place. -class ProcResource : ProcResourceKind, - ProcResourceUnits; +class ProcResource pfmCounters = []> : ProcResourceKind, + ProcResourceUnits; class ProcResGroup resources> : ProcResourceKind { list Resources = resources; @@ -476,3 +477,23 @@ SchedMachineModel SchedModel = ?; } +// Allow the definition of hardware counters. +class PfmCounter { + SchedMachineModel SchedModel = ?; +} + +// Each processor can define how to measure cycles by defining a +// PfmCycleCounter. +class PfmCycleCounter : PfmCounter { + string Counter = counter; +} + +// Each ProcResourceUnits can define how to measure issued uops by defining +// a PfmIssueCounter. +class PfmIssueCounter counters> + : PfmCounter{ + // The resource units on which uops are issued. + ProcResourceUnits Resource = resource; + // The list of counters that measure issue events. + list Counters = counters; +} Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -1132,3 +1132,9 @@ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; let AllowRegisterRenaming = 1; } + +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "X86PfmCounters.td" Index: lib/Target/X86/X86PfmCounters.td =================================================================== --- /dev/null +++ lib/Target/X86/X86PfmCounters.td @@ -0,0 +1,58 @@ +let SchedModel = SandyBridgeModel in { +def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; +def SBPort0Counter : PfmIssueCounter; +def SBPort1Counter : PfmIssueCounter; +def SBPort23Counter : PfmIssueCounter; +def SBPort4Counter : PfmIssueCounter; +def SBPort5Counter : PfmIssueCounter; +} + +let SchedModel = HaswellModel in { +def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; +def HWPort0Counter : PfmIssueCounter; +def HWPort1Counter : PfmIssueCounter; +def HWPort2Counter : PfmIssueCounter; +def HWPort3Counter : PfmIssueCounter; +def HWPort4Counter : PfmIssueCounter; +def HWPort5Counter : PfmIssueCounter; +def HWPort6Counter : PfmIssueCounter; +def HWPort7Counter : PfmIssueCounter; +} + +let SchedModel = BroadwellModel in { +def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; +def BWPort0Counter : PfmIssueCounter; +def BWPort1Counter : PfmIssueCounter; +def BWPort2Counter : PfmIssueCounter; +def BWPort3Counter : PfmIssueCounter; +def BWPort4Counter : PfmIssueCounter; +def BWPort5Counter : PfmIssueCounter; +def BWPort6Counter : PfmIssueCounter; +def BWPort7Counter : PfmIssueCounter; +} + +let SchedModel = SkylakeClientModel in { +def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; +def SKLPort0Counter : PfmIssueCounter; +def SKLPort1Counter : PfmIssueCounter; +def SKLPort2Counter : PfmIssueCounter; +def SKLPort3Counter : PfmIssueCounter; +def SKLPort4Counter : PfmIssueCounter; +def SKLPort5Counter : PfmIssueCounter; +def SKLPort6Counter : PfmIssueCounter; +def SKLPort7Counter : PfmIssueCounter; +} + +let SchedModel = SkylakeServerModel in { +def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; +def SKXPort0Counter : PfmIssueCounter; +def SKXPort1Counter : PfmIssueCounter; +def SKXPort2Counter : PfmIssueCounter; +def SKXPort3Counter : PfmIssueCounter; +def SKXPort4Counter : PfmIssueCounter; +def SKXPort5Counter : PfmIssueCounter; +def SKXPort6Counter : PfmIssueCounter; +def SKXPort7Counter : PfmIssueCounter; +} Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -76,10 +76,15 @@ // measure several times and take the minimum value. constexpr const int NumMeasurements = 30; int64_t MinLatency = std::numeric_limits::max(); - // FIXME: Read the perf event from the MCSchedModel (see PR36984). - const pfm::PerfEvent CyclesPerfEvent("UNHALTED_CORE_CYCLES"); + const char *CounterName = State.getSubtargetInfo() + .getSchedModel() + .getExtraProcessorInfo() + .PfmCounters.CycleCounter; + if (!CounterName) + llvm::report_fatal_error("sched model does not define a cycle counter"); + const pfm::PerfEvent CyclesPerfEvent(CounterName); if (!CyclesPerfEvent.valid()) - llvm::report_fatal_error("invalid perf event 'UNHALTED_CORE_CYCLES'"); + llvm::report_fatal_error("invalid perf event"); for (size_t I = 0; I < NumMeasurements; ++I) { pfm::Counter Counter(CyclesPerfEvent); Counter.start(); Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -38,43 +38,6 @@ llvm::inconvertibleErrorCode()); } -// FIXME: Read the counter names from the ProcResourceUnits when PR36984 is -// fixed. -static const std::string *getEventNameFromProcResName(const char *ProcResName) { - static const std::unordered_map Entries = { - {"SBPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"SBPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"SBPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"SBPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"HWPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"HWPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"HWPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, - {"HWPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, - {"HWPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"HWPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"HWPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, - {"HWPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, - {"SKLPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"SKLPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"SKLPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, - {"SKLPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, - {"SKLPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"SKLPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"SKLPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, - {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, - {"SKXPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"SKXPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"SKXPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, - {"SKXPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, - {"SKXPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"SKXPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"SKXPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, - {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, - }; - const auto It = Entries.find(ProcResName); - return It == Entries.end() ? nullptr : &It->second; -} - static std::vector generateIndependentAssignments( const LLVMState &State, const llvm::MCInstrDesc &InstrDesc, llvm::SmallVector Vars, int MaxAssignments) { @@ -228,19 +191,19 @@ std::vector Result; for (unsigned ProcResIdx = 1; ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) { - const llvm::MCProcResourceDesc &ProcRes = - *SchedModel.getProcResource(ProcResIdx); - const std::string *const EventName = - getEventNameFromProcResName(ProcRes.Name); - if (!EventName) + const char *const PfmCounters = SchedModel.getExtraProcessorInfo() + .PfmCounters.IssueCounters[ProcResIdx]; + if (!PfmCounters) continue; - pfm::Counter Counter{pfm::PerfEvent(*EventName)}; + // FIXME: Sum results when there are several counters for a single ProcRes + // (e.g. P23 on SandyBridge). + pfm::Counter Counter{pfm::PerfEvent(PfmCounters)}; Counter.start(); Function(); Counter.stop(); Result.push_back({llvm::itostr(ProcResIdx), static_cast(Counter.read()) / NumRepetitions, - ProcRes.Name}); + SchedModel.getProcResource(ProcResIdx)->Name}); } return Result; } Index: utils/TableGen/CodeGenSchedule.h =================================================================== --- utils/TableGen/CodeGenSchedule.h +++ utils/TableGen/CodeGenSchedule.h @@ -238,6 +238,10 @@ // Optional Retire Control Unit definition. Record *RetireControlUnit; + // List of PfmCounters. + RecVec PfmIssueCounterDefs; + Record *PfmCycleCounterDef = nullptr; + CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef, Record *IDef) : Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef), @@ -252,7 +256,9 @@ } bool hasExtraProcessorInfo() const { - return RetireControlUnit || !RegisterFiles.empty(); + return RetireControlUnit || !RegisterFiles.empty() || + !PfmIssueCounterDefs.empty() || + PfmCycleCounterDef != nullptr; } unsigned getProcResourceIdx(Record *PRDef) const; @@ -444,6 +450,8 @@ void collectRegisterFiles(); + void collectPfmCounters(); + void collectOptionalProcessorInfo(); std::string createSchedClassName(Record *ItinClassDef, Index: utils/TableGen/CodeGenSchedule.cpp =================================================================== --- utils/TableGen/CodeGenSchedule.cpp +++ utils/TableGen/CodeGenSchedule.cpp @@ -239,6 +239,11 @@ // Collect processor RetireControlUnit descriptors if available. collectRetireControlUnits(); + + // Find pfm counter definitions for each processor. + collectPfmCounters(); + + checkCompleteness(); } /// Gather all processor models. @@ -1537,6 +1542,23 @@ } } +// Collect all the RegisterFile definitions available in this target. +void CodeGenSchedModels::collectPfmCounters() { + for (Record *Def : Records.getAllDerivedDefinitions("PfmIssueCounter")) { + CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); + PM.PfmIssueCounterDefs.emplace_back(Def); + } + for (Record *Def : Records.getAllDerivedDefinitions("PfmCycleCounter")) { + CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); + if (PM.PfmCycleCounterDef) { + PrintFatalError(Def->getLoc(), + "multiple cycle counters for " + + Def->getValueAsDef("SchedModel")->getName()); + } + PM.PfmCycleCounterDef = Def; + } +} + // Collect and sort WriteRes, ReadAdvance, and ProcResources. void CodeGenSchedModels::collectProcResources() { ProcResourceDefs = Records.getAllDerivedDefinitions("ProcResourceUnits"); Index: utils/TableGen/SubtargetEmitter.cpp =================================================================== --- utils/TableGen/SubtargetEmitter.cpp +++ utils/TableGen/SubtargetEmitter.cpp @@ -635,7 +635,7 @@ OS << ProcModel.ModelName << "RegisterCosts,\n "; else OS << "nullptr,\n "; - OS << NumCostEntries << " // Number of register cost entries.\n"; + OS << NumCostEntries << ", // Number of register cost entries.\n"; } unsigned @@ -686,6 +686,58 @@ return CostTblIndex; } +static bool EmitPfmIssueCountersTable(const CodeGenProcModel &ProcModel, + raw_ostream &OS) { + std::vector CounterDefs(ProcModel.ProcResourceDefs.size()); + bool HasCounters = false; + for (const Record *CounterDef : ProcModel.PfmIssueCounterDefs) { + const Record *&CD = CounterDefs[ProcModel.getProcResourceIdx( + CounterDef->getValueAsDef("Resource"))]; + if (CD) { + PrintFatalError(CounterDef->getLoc(), + "multiple issue counters for " + + CounterDef->getValueAsDef("Resource")->getName()); + } + CD = CounterDef; + HasCounters = true; + } + if (!HasCounters) { + return false; + } + OS << "\nstatic const char* " << ProcModel.ModelName + << "PfmIssueCounters[] = {\n" + << " nullptr, // Invalid\n"; + for (const Record *CounterDef : CounterDefs) { + if (CounterDef) { + const auto PfmCounters = CounterDef->getValueAsListOfStrings("Counters"); + if (PfmCounters.empty()) + PrintFatalError(CounterDef->getLoc(), "empty counter list"); + for (const StringRef CounterName : PfmCounters) + OS << " \"" << CounterName << ",\""; + OS << ", //" << CounterDef->getValueAsDef("Resource")->getName() << "\n"; + } else { + OS << " nullptr,\n"; + } + } + OS << "};\n"; + return true; +} + +static void EmitPfmCounters(const CodeGenProcModel &ProcModel, + const bool HasPfmIssueCounters, raw_ostream &OS) { + // Emit the cycle counter. + if (ProcModel.PfmCycleCounterDef) + OS << " \"" << ProcModel.PfmCycleCounterDef->getValueAsString("Counter") + << "\", // Cycle counter.\n"; + else + OS << " nullptr, // No cycle counter.\n"; + + // Emit a reference to issue counters table. + if (HasPfmIssueCounters) + OS << " " << ProcModel.ModelName << "PfmIssueCounters\n"; + else + OS << " nullptr, // No issue counters.\n"; +} void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS) { @@ -693,6 +745,9 @@ // defined register file), and a table of register costs. unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS); + // Generate a table of ProcRes counter names. + const bool HasPfmIssueCounters = EmitPfmIssueCountersTable(ProcModel, OS); + // Now generate a table for the extra processor info. OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName << "ExtraInfo = {\n "; @@ -705,6 +760,8 @@ EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(), NumCostEntries, OS); + EmitPfmCounters(ProcModel, HasPfmIssueCounters, OS); + OS << "};\n"; } @@ -1308,9 +1365,9 @@ else OS << " nullptr, // No Itinerary\n"; if (PM.hasExtraProcessorInfo()) - OS << " &" << PM.ModelName << "ExtraInfo\n"; + OS << " &" << PM.ModelName << "ExtraInfo,\n"; else - OS << " nullptr // No extra processor descriptor\n"; + OS << " nullptr // No extra processor descriptor\n"; OS << "};\n"; } }