Index: include/llvm/MC/MCSchedule.h =================================================================== --- include/llvm/MC/MCSchedule.h +++ include/llvm/MC/MCSchedule.h @@ -49,6 +49,10 @@ // nullptr if the resource does not have sub-units. const unsigned *SubUnitsIdxBegin; + // An optional comma-separated list of performance counters that can be used + // to measure the unit utilization. + const char *PfmCounters; + bool operator==(const MCProcResourceDesc &Other) const { return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx && BufferSize == Other.BufferSize; @@ -245,6 +249,10 @@ const MCExtraProcessorInfo *ExtraProcessorInfo; + // An optional name of a performance counter that can be used to measure + // cycles. + const char *PfmCycleCounter; + bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; } unsigned getProcessorID() const { return ProcID; } Index: include/llvm/Target/TargetSchedule.td =================================================================== --- include/llvm/Target/TargetSchedule.td +++ include/llvm/Target/TargetSchedule.td @@ -120,6 +120,10 @@ list UnsupportedFeatures = []; bit NoModel = 0; // Special tag to indicate missing machine model. + + // The name of the libpfm counter that counts cycles for this scheduling + // model. + string PfmCycleCounter = ""; } def NoSchedModel : SchedMachineModel { @@ -182,12 +186,16 @@ // // SchedModel ties these units to a processor for any stand-alone defs // of this class. -class ProcResourceUnits { +class ProcResourceUnits pfmCounters> { ProcResourceKind Kind = kind; int NumUnits = num; ProcResourceKind Super = ?; int BufferSize = -1; SchedMachineModel SchedModel = ?; + + // The list of libpfm counter names that measure utilization of these units. + list PfmCounters = pfmCounters; } // EponymousProcResourceKind helps implement ProcResourceUnits by @@ -197,8 +205,8 @@ // Subtargets typically define processor resource kind and number of // units in one place. -class ProcResource : ProcResourceKind, - ProcResourceUnits; +class ProcResource pfmCounters = []> : ProcResourceKind, + ProcResourceUnits; class ProcResGroup resources> : ProcResourceKind { list Resources = resources; Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -37,14 +37,14 @@ // ignore that. // Ports 2 and 3 are identical. They handle loads and the address half of // stores. Port 7 can handle address calculations. -def BWPort0 : ProcResource<1>; -def BWPort1 : ProcResource<1>; -def BWPort2 : ProcResource<1>; -def BWPort3 : ProcResource<1>; -def BWPort4 : ProcResource<1>; -def BWPort5 : ProcResource<1>; -def BWPort6 : ProcResource<1>; -def BWPort7 : ProcResource<1>; +def BWPort0 : ProcResource<1, ["uops_dispatched_port:port_0"]>; +def BWPort1 : ProcResource<1, ["uops_dispatched_port:port_1"]>; +def BWPort2 : ProcResource<1, ["uops_dispatched_port:port_2"]>; +def BWPort3 : ProcResource<1, ["uops_dispatched_port:port_3"]>; +def BWPort4 : ProcResource<1, ["uops_dispatched_port:port_4"]>; +def BWPort5 : ProcResource<1, ["uops_dispatched_port:port_5"]>; +def BWPort6 : ProcResource<1, ["uops_dispatched_port:port_6"]>; +def BWPort7 : ProcResource<1, ["uops_dispatched_port:port_7"]>; // Many micro-ops are capable of issuing on multiple ports. def BWPort01 : ProcResGroup<[BWPort0, BWPort1]>; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -26,6 +26,8 @@ // This flag is set to allow the scheduler to assign a default model to // unrecognized opcodes. let CompleteModel = 0; + + let PfmCycleCounter = "unhalted_core_cycles"; } let SchedModel = HaswellModel in { @@ -38,14 +40,14 @@ // ignore that. // Ports 2 and 3 are identical. They handle loads and the address half of // stores. Port 7 can handle address calculations. -def HWPort0 : ProcResource<1>; -def HWPort1 : ProcResource<1>; -def HWPort2 : ProcResource<1>; -def HWPort3 : ProcResource<1>; -def HWPort4 : ProcResource<1>; -def HWPort5 : ProcResource<1>; -def HWPort6 : ProcResource<1>; -def HWPort7 : ProcResource<1>; +def HWPort0 : ProcResource<1, ["uops_dispatched_port:port_0"]>; +def HWPort1 : ProcResource<1, ["uops_dispatched_port:port_1"]>; +def HWPort2 : ProcResource<1, ["uops_dispatched_port:port_2"]>; +def HWPort3 : ProcResource<1, ["uops_dispatched_port:port_3"]>; +def HWPort4 : ProcResource<1, ["uops_dispatched_port:port_4"]>; +def HWPort5 : ProcResource<1, ["uops_dispatched_port:port_5"]>; +def HWPort6 : ProcResource<1, ["uops_dispatched_port:port_6"]>; +def HWPort7 : ProcResource<1, ["uops_dispatched_port:port_7"]>; // Many micro-ops are capable of issuing on multiple ports. def HWPort01 : ProcResGroup<[HWPort0, HWPort1]>; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -34,18 +34,19 @@ // Sandy Bridge can issue micro-ops to 6 different ports in one cycle. // Ports 0, 1, and 5 handle all computation. -def SBPort0 : ProcResource<1>; -def SBPort1 : ProcResource<1>; -def SBPort5 : ProcResource<1>; +def SBPort0 : ProcResource<1, ["uops_dispatched_port:port_0"]>; +def SBPort1 : ProcResource<1, ["uops_dispatched_port:port_1"]>; +def SBPort5 : ProcResource<1, ["uops_dispatched_port:port_5"]>; // Ports 2 and 3 are identical. They handle loads and the address half of // stores. -def SBPort23 : ProcResource<2>; +def SBPort23 : ProcResource<2, ["uops_dispatched_port:port_2", + "uops_dispatched_port:port_3"]>; // Port 4 gets the data half of stores. Store data can be available later than // the store address, but since we don't model the latency of stores, we can // ignore that. -def SBPort4 : ProcResource<1>; +def SBPort4 : ProcResource<1, ["uops_dispatched_port:port_4"]>; // Many micro-ops are capable of issuing on multiple ports. def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>; Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -38,14 +38,14 @@ // ignore that. // Ports 2 and 3 are identical. They handle loads and the address half of // stores. Port 7 can handle address calculations. -def SKLPort0 : ProcResource<1>; -def SKLPort1 : ProcResource<1>; -def SKLPort2 : ProcResource<1>; -def SKLPort3 : ProcResource<1>; -def SKLPort4 : ProcResource<1>; -def SKLPort5 : ProcResource<1>; -def SKLPort6 : ProcResource<1>; -def SKLPort7 : ProcResource<1>; +def SKLPort0 : ProcResource<1, ["uops_dispatched_port:port_0"]>; +def SKLPort1 : ProcResource<1, ["uops_dispatched_port:port_1"]>; +def SKLPort2 : ProcResource<1, ["uops_dispatched_port:port_2"]>; +def SKLPort3 : ProcResource<1, ["uops_dispatched_port:port_3"]>; +def SKLPort4 : ProcResource<1, ["uops_dispatched_port:port_4"]>; +def SKLPort5 : ProcResource<1, ["uops_dispatched_port:port_5"]>; +def SKLPort6 : ProcResource<1, ["uops_dispatched_port:port_6"]>; +def SKLPort7 : ProcResource<1, ["uops_dispatched_port:port_7"]>; // Many micro-ops are capable of issuing on multiple ports. def SKLPort01 : ProcResGroup<[SKLPort0, SKLPort1]>; Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -38,14 +38,14 @@ // ignore that. // Ports 2 and 3 are identical. They handle loads and the address half of // stores. Port 7 can handle address calculations. -def SKXPort0 : ProcResource<1>; -def SKXPort1 : ProcResource<1>; -def SKXPort2 : ProcResource<1>; -def SKXPort3 : ProcResource<1>; -def SKXPort4 : ProcResource<1>; -def SKXPort5 : ProcResource<1>; -def SKXPort6 : ProcResource<1>; -def SKXPort7 : ProcResource<1>; +def SKXPort0 : ProcResource<1, ["uops_dispatched_port:port_0"]>; +def SKXPort1 : ProcResource<1, ["uops_dispatched_port:port_1"]>; +def SKXPort2 : ProcResource<1, ["uops_dispatched_port:port_2"]>; +def SKXPort3 : ProcResource<1, ["uops_dispatched_port:port_3"]>; +def SKXPort4 : ProcResource<1, ["uops_dispatched_port:port_4"]>; +def SKXPort5 : ProcResource<1, ["uops_dispatched_port:port_5"]>; +def SKXPort6 : ProcResource<1, ["uops_dispatched_port:port_6"]>; +def SKXPort7 : ProcResource<1, ["uops_dispatched_port:port_7"]>; // Many micro-ops are capable of issuing on multiple ports. def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>; Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -76,10 +76,10 @@ // measure several times and take the minimum value. constexpr const int NumMeasurements = 30; int64_t MinLatency = std::numeric_limits::max(); - // FIXME: Read the perf event from the MCSchedModel (see PR36984). - const pfm::PerfEvent CyclesPerfEvent("UNHALTED_CORE_CYCLES"); + const pfm::PerfEvent CyclesPerfEvent( + State.getSubtargetInfo().getSchedModel().PfmCycleCounter); if (!CyclesPerfEvent.valid()) - llvm::report_fatal_error("invalid perf event 'UNHALTED_CORE_CYCLES'"); + llvm::report_fatal_error("invalid perf event"); for (size_t I = 0; I < NumMeasurements; ++I) { pfm::Counter Counter(CyclesPerfEvent); Counter.start(); Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -38,43 +38,6 @@ llvm::inconvertibleErrorCode()); } -// FIXME: Read the counter names from the ProcResourceUnits when PR36984 is -// fixed. -static const std::string *getEventNameFromProcResName(const char *ProcResName) { - static const std::unordered_map Entries = { - {"SBPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"SBPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"SBPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"SBPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"HWPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"HWPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"HWPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, - {"HWPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, - {"HWPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"HWPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"HWPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, - {"HWPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, - {"SKLPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"SKLPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"SKLPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, - {"SKLPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, - {"SKLPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"SKLPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"SKLPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, - {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, - {"SKXPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, - {"SKXPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, - {"SKXPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, - {"SKXPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, - {"SKXPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, - {"SKXPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, - {"SKXPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, - {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, - }; - const auto It = Entries.find(ProcResName); - return It == Entries.end() ? nullptr : &It->second; -} - static std::vector generateIndependentAssignments( const LLVMState &State, const llvm::MCInstrDesc &InstrDesc, llvm::SmallVector Vars, int MaxAssignments) { @@ -230,11 +193,11 @@ ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) { const llvm::MCProcResourceDesc &ProcRes = *SchedModel.getProcResource(ProcResIdx); - const std::string *const EventName = - getEventNameFromProcResName(ProcRes.Name); - if (!EventName) + if (!ProcRes.PfmCounters) continue; - pfm::Counter Counter{pfm::PerfEvent(*EventName)}; + // FIXME: Sum results when there are several counters for a single ProcRes + // (e.g. P23 on SandyBridge). + pfm::Counter Counter{pfm::PerfEvent(ProcRes.PfmCounters)}; Counter.start(); Function(); Counter.stop(); Index: utils/TableGen/SubtargetEmitter.cpp =================================================================== --- utils/TableGen/SubtargetEmitter.cpp +++ utils/TableGen/SubtargetEmitter.cpp @@ -699,7 +699,7 @@ OS << "static const llvm::MCProcResourceDesc " << ProcModel.ModelName << "ProcResources" << "[] = {\n" - << " {\"InvalidUnit\", 0, 0, 0, 0},\n"; + << " {\"InvalidUnit\", 0, 0, 0, 0, nullptr},\n"; unsigned SubUnitsOffset = 1; for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) { @@ -738,6 +738,17 @@ } else { OS << "nullptr"; } + OS << ", "; + if (PRDef->isSubClassOf("ProcResourceUnits")) { + const auto PfmCounters = PRDef->getValueAsListOfStrings("PfmCounters"); + if (!PfmCounters.empty()) { + for (const StringRef CounterName : PfmCounters) { + OS << " \"" << CounterName << ",\""; + } + } + } else { + OS << "nullptr"; + } OS << "}, // #" << i+1; if (SuperDef) OS << ", Super=" << SuperDef->getName(); @@ -1291,9 +1302,15 @@ else OS << " nullptr, // No Itinerary\n"; if (PM.hasExtraProcessorInfo()) - OS << " &" << PM.ModelName << "ExtraInfo\n"; + OS << " &" << PM.ModelName << "ExtraInfo,\n"; + else + OS << " nullptr, // No extra processor descriptor\n"; + const StringRef PfmCycleCounter = + PM.ModelDef->getValueAsString("PfmCycleCounter"); + if (!PfmCycleCounter.empty()) + OS << " \"" << PfmCycleCounter << "\"\n"; else - OS << " nullptr // No extra processor descriptor\n"; + OS << " nullptr // No pfm cycle counter\n"; OS << "};\n"; } }