Index: include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- include/llvm/CodeGen/TargetSubtargetInfo.h +++ include/llvm/CodeGen/TargetSubtargetInfo.h @@ -65,6 +65,7 @@ ArrayRef PF, ArrayRef PD, const SubtargetInfoKV *ProcSched, + const SubtargetInfoKV *ProcPfm, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, @@ -162,7 +163,7 @@ /// /// Similar in behavior to `isZeroIdiom`. However, it knows how to identify /// all dependency breaking instructions (i.e. not just zero-idioms). - /// + /// /// As for `isZeroIdiom`, this method returns a mask of "broken" dependencies. /// (See method `isZeroIdiom` for a detailed description of Mask). virtual bool isDependencyBreaking(const MachineInstr *MI, APInt &Mask) const { Index: include/llvm/MC/MCSchedule.h =================================================================== --- include/llvm/MC/MCSchedule.h +++ include/llvm/MC/MCSchedule.h @@ -176,22 +176,31 @@ unsigned NumRegisterFiles; const MCRegisterCostEntry *RegisterCostTable; unsigned NumRegisterCostEntries; +}; - struct PfmCountersInfo { - // An optional name of a performance counter that can be used to measure - // cycles. - const char *CycleCounter; - - // An optional name of a performance counter that can be used to measure - // uops. - const char *UopsCounter; - - // For each MCProcResourceDesc defined by the processor, an optional list of - // names of performance counters that can be used to measure the resource - // utilization. - const char **IssueCounters; +struct MCPfmCountersInfo { + // An optional name of a performance counter that can be used to measure + // cycles. + const char *const CycleCounter; + + // An optional name of a performance counter that can be used to measure + // uops. + const char *const UopsCounter; + + // An IssueCounter specifies how to measure uops issued to specific proc + // resources. + struct IssueCounter { + const char *const Counter; + // The name of the ProcResource that this counter measures. + const char *const ProcResName; }; - PfmCountersInfo PfmCounters; + // An optional list of IssueCounters. + const IssueCounter *const IssueCounters; + const unsigned NumIssueCounters; + + /// Returns the default initialized pfm counters. + static const MCPfmCountersInfo &getDefault() { return Default; } + static const MCPfmCountersInfo Default; }; /// Machine model for scheduling, bundling, and heuristics. Index: include/llvm/MC/MCSubtargetInfo.h =================================================================== --- include/llvm/MC/MCSubtargetInfo.h +++ include/llvm/MC/MCSubtargetInfo.h @@ -41,10 +41,12 @@ // Scheduler machine model const SubtargetInfoKV *ProcSchedModels; + const SubtargetInfoKV *ProcPfmCounters; const MCWriteProcResEntry *WriteProcResTable; const MCWriteLatencyEntry *WriteLatencyTable; const MCReadAdvanceEntry *ReadAdvanceTable; const MCSchedModel *CPUSchedModel; + const MCPfmCountersInfo *CPUPfmCounters; const InstrStage *Stages; // Instruction itinerary stages const unsigned *OperandCycles; // Itinerary operand cycles @@ -57,6 +59,7 @@ ArrayRef PF, ArrayRef PD, const SubtargetInfoKV *ProcSched, + const SubtargetInfoKV *ProcPfm, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, const unsigned *FP); @@ -115,6 +118,12 @@ /// Get the machine model for this subtarget's CPU. const MCSchedModel &getSchedModel() const { return *CPUSchedModel; } + /// Get the pfm counters of a CPU. + const MCPfmCountersInfo &getPfmCountersForCPU(StringRef CPU) const; + + /// Get the pfm counters for this subtarget's CPU. + const MCPfmCountersInfo &getPfmCounters() const { return *CPUPfmCounters; } + /// Return an iterator at the first process resource consumed by the given /// scheduling class. const MCWriteProcResEntry *getWriteProcResBegin( Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -1453,7 +1453,8 @@ // by the scheduler. Each Processor definition requires corresponding // instruction itineraries. // -class Processor f> { +class Processor f, + ProcPfmCounters counters = NoProcPfmCounters> { // Name - Chip set name. Used by command line (-mcpu=) to determine the // appropriate target chip. // @@ -1469,6 +1470,8 @@ // Features - list of list Features = f; + + ProcPfmCounters PfmCounters = counters; } // ProcessorModel allows subtargets to specify the more general @@ -1477,8 +1480,9 @@ // // Although this class always passes NoItineraries to the Processor // class, the SchedMachineModel may still define valid Itineraries. -class ProcessorModel f> - : Processor { +class ProcessorModel f, + ProcPfmCounters counters = NoProcPfmCounters> + : Processor { let SchedModel = m; } Index: include/llvm/Target/TargetSchedule.td =================================================================== --- include/llvm/Target/TargetSchedule.td +++ include/llvm/Target/TargetSchedule.td @@ -531,29 +531,32 @@ } // Allow the definition of hardware counters. -class PfmCounter { - SchedMachineModel SchedModel = ?; -} - -// Each processor can define how to measure cycles by defining a -// PfmCycleCounter. -class PfmCycleCounter : PfmCounter { +class PfmCounter { + // The name of the counter that measures events. + // The name can be "some_counter + some_other_counter", in which case the + // measured value is the sum of events on these counters. string Counter = counter; } -// Each ProcResourceUnits can define how to measure issued uops by defining -// a PfmIssueCounter. -class PfmIssueCounter counters> - : PfmCounter{ - // The resource units on which uops are issued. - ProcResourceUnits Resource = resource; - // The list of counters that measure issue events. - list Counters = counters; +// Issue counters can be tied to a ProcResource +class PfmIssueCounter + : PfmCounter { + // The name of the ProcResource on which uops are issued. This is used by + // llvm-exegesis to compare measurements with values in the SchedModels. + // If the CPU has a sched model, this should correspond to the name of a + // ProcResource. + string ResourceName = resource_name; } -// Each processor can define how to measure NumMicroOps by defining a -// PfmUopsCounter. -class PfmUopsCounter : PfmCounter { - string Counter = counter; +def NoPfmCounter : PfmCounter <""> {} + +class ProcPfmCounters { + // Processors can define how to measure cycles by defining a CycleCounter. + PfmCounter CycleCounter = NoPfmCounter; + // Processors can define how to measure uops by defining a UopsCounter. + PfmCounter UopsCounter = NoPfmCounter; + // Processors can define how to measure issued uops by defining IssueCounters. + list IssueCounters = []; } +def NoProcPfmCounters : ProcPfmCounters {} Index: lib/CodeGen/TargetSubtargetInfo.cpp =================================================================== --- lib/CodeGen/TargetSubtargetInfo.cpp +++ lib/CodeGen/TargetSubtargetInfo.cpp @@ -26,11 +26,12 @@ TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef FS, ArrayRef PF, ArrayRef PD, - const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, - const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) - : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { -} + const SubtargetInfoKV *ProcSched, const SubtargetInfoKV *ProcPfm, + const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, + const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, + const unsigned *FP) + : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, ProcPfm, WPR, WL, RA, IS, + OC, FP) {} TargetSubtargetInfo::~TargetSubtargetInfo() = default; Index: lib/MC/MCSchedule.cpp =================================================================== --- lib/MC/MCSchedule.cpp +++ lib/MC/MCSchedule.cpp @@ -21,7 +21,7 @@ using namespace llvm; static_assert(std::is_pod::value, - "We shouldn't have a static constructor here"); + "We shouldn't have dynamic initialization here"); const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth, DefaultMicroOpBufferSize, DefaultLoopMicroOpBufferSize, @@ -150,3 +150,8 @@ // that it can execute at the maximum default issue width. return 1.0 / DefaultIssueWidth; } + +static_assert(std::is_pod::value, + "We shouldn't have dynamic initialization here"); +const MCPfmCountersInfo MCPfmCountersInfo::Default = {nullptr, nullptr, + nullptr}; Index: lib/MC/MCSubtargetInfo.cpp =================================================================== --- lib/MC/MCSubtargetInfo.cpp +++ lib/MC/MCSubtargetInfo.cpp @@ -29,10 +29,13 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures); - if (!CPU.empty()) + if (!CPU.empty()) { CPUSchedModel = &getSchedModelForCPU(CPU); - else + CPUPfmCounters = &getPfmCountersForCPU(CPU); + } else { CPUSchedModel = &MCSchedModel::GetDefaultSchedModel(); + CPUPfmCounters = &MCPfmCountersInfo::getDefault(); + } } void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) { @@ -42,12 +45,14 @@ MCSubtargetInfo::MCSubtargetInfo( const Triple &TT, StringRef C, StringRef FS, ArrayRef PF, ArrayRef PD, - const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, - const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) + const SubtargetInfoKV *ProcSched, const SubtargetInfoKV *ProcPfm, + const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, + const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, + const unsigned *FP) : TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD), - ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL), - ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) { + ProcSchedModels(ProcSched), ProcPfmCounters(ProcPfm), + WriteProcResTable(WPR), WriteLatencyTable(WL), ReadAdvanceTable(RA), + Stages(IS), OperandCycles(OC), ForwardingPaths(FP) { InitMCProcessorInfo(CPU, FS); } @@ -83,29 +88,40 @@ return (FeatureBits & All) == Set; } -const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { - assert(ProcSchedModels && "Processor machine model not available!"); - - ArrayRef SchedModels(ProcSchedModels, ProcDesc.size()); - - assert(std::is_sorted(SchedModels.begin(), SchedModels.end(), - [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) { - return strcmp(LHS.Key, RHS.Key) < 0; - }) && - "Processor machine model table is not sorted"); +template +static const ValueT &getValueForCpu(StringRef CPU, + ArrayRef ProcTs) { + assert(std::is_sorted( + ProcTs.begin(), ProcTs.end(), + [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) { + return strcmp(LHS.Key, RHS.Key) < 0; + }) && + "SubtargetInfoKV table is not sorted"); // Find entry - auto Found = - std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU); - if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) { + auto Found = std::lower_bound(ProcTs.begin(), ProcTs.end(), CPU); + if (Found == ProcTs.end() || StringRef(Found->Key) != CPU) { if (CPU != "help") // Don't error if the user asked for help. errs() << "'" << CPU << "' is not a recognized processor for this target" << " (ignoring processor)\n"; - return MCSchedModel::GetDefaultSchedModel(); + return GetDefault(); } - assert(Found->Value && "Missing processor SchedModel value"); - return *(const MCSchedModel *)Found->Value; + assert(Found->Value && "Missing processor value"); + return *(const ValueT *)Found->Value; +} + +const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { + assert(ProcSchedModels && "Processor machine model not available!"); + return getValueForCpu( + CPU, ArrayRef(ProcSchedModels, ProcDesc.size())); +} + +const MCPfmCountersInfo & +MCSubtargetInfo::getPfmCountersForCPU(StringRef CPU) const { + assert(ProcPfmCounters && "Processor pfm counters are available!"); + return getValueForCpu( + CPU, ArrayRef(ProcPfmCounters, ProcDesc.size())); } InstrItineraryData Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -411,6 +411,12 @@ include "X86RegisterInfo.td" include "X86RegisterBanks.td" +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "X86PfmCounters.td" + //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// @@ -463,7 +469,7 @@ "IntelIcelakeServer", "Intel Icelake Server processors">; class Proc Features> - : ProcessorModel; + : ProcessorModel; def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; @@ -613,8 +619,10 @@ class ProcModel ProcFeatures, - list OtherFeatures> : - ProcessorModel; + list OtherFeatures, + ProcPfmCounters counters = NoProcPfmCounters> : + ProcessorModel; def GLMFeatures : ProcessorFeatures<[], [ FeatureX87, @@ -739,7 +747,7 @@ SNBFeatures.Value, [ FeatureSlowUAMem32, FeaturePOPCNTFalseDeps -]>; +], SandyBridgePfmCounters>; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. @@ -753,7 +761,7 @@ IVBFeatures.Value, [ FeatureSlowUAMem32, FeaturePOPCNTFalseDeps -]>; +], SandyBridgePfmCounters>; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. @@ -774,7 +782,7 @@ ProcIntelHSW, FeaturePOPCNTFalseDeps, FeatureLZCNTFalseDeps -]>; +], HaswellPfmCounters>; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. @@ -788,7 +796,7 @@ ProcIntelBDW, FeaturePOPCNTFalseDeps, FeatureLZCNTFalseDeps -]>; +], BroadwellPfmCounters>; def : BroadwellProc<"broadwell">; def SKLFeatures : ProcessorFeatures; +], SkylakeClientPfmCounters>; def : SkylakeClientProc<"skylake">; def KNLFeatures : ProcessorFeatures; +], SkylakeServerPfmCounters>; def : SkylakeServerProc<"skylake-avx512">; def : SkylakeServerProc<"skx">; // Legacy alias. @@ -1248,9 +1256,3 @@ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; let AllowRegisterRenaming = 1; } - -//===----------------------------------------------------------------------===// -// Pfm Counters -//===----------------------------------------------------------------------===// - -include "X86PfmCounters.td" Index: lib/Target/X86/X86PfmCounters.td =================================================================== --- lib/Target/X86/X86PfmCounters.td +++ lib/Target/X86/X86PfmCounters.td @@ -11,73 +11,86 @@ // //===----------------------------------------------------------------------===// -let SchedModel = SandyBridgeModel in { -def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def SBPort0Counter : PfmIssueCounter; -def SBPort1Counter : PfmIssueCounter; -def SBPort23Counter : PfmIssueCounter; -def SBPort4Counter : PfmIssueCounter; -def SBPort5Counter : PfmIssueCounter; -def SBUopsCounter : PfmUopsCounter<"uops_issued:any">; +def UnhaltedCoreCyclesPfmCounter : PfmCounter<"unhalted_core_cycles">; +def UopsIssuedPfmCounter : PfmCounter<"uops_issued:any">; + +def SandyBridgePfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"SBPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"SBPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"SBPort23", "uops_dispatched_port:port_2 + uops_dispatched_port:port_3">, + PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5"> + ]; } -let SchedModel = HaswellModel in { -def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def HWPort0Counter : PfmIssueCounter; -def HWPort1Counter : PfmIssueCounter; -def HWPort2Counter : PfmIssueCounter; -def HWPort3Counter : PfmIssueCounter; -def HWPort4Counter : PfmIssueCounter; -def HWPort5Counter : PfmIssueCounter; -def HWPort6Counter : PfmIssueCounter; -def HWPort7Counter : PfmIssueCounter; -def HWUopsCounter : PfmUopsCounter<"uops_issued:any">; +def HaswellPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"HWPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"HWPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"HWPort2", "uops_dispatched_port:port_2">, + PfmIssueCounter<"HWPort3", "uops_dispatched_port:port_3">, + PfmIssueCounter<"HWPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"HWPort5", "uops_dispatched_port:port_5">, + PfmIssueCounter<"HWPort6", "uops_dispatched_port:port_6">, + PfmIssueCounter<"HWPort7", "uops_dispatched_port:port_7"> + ]; } -let SchedModel = BroadwellModel in { -def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def BWPort0Counter : PfmIssueCounter; -def BWPort1Counter : PfmIssueCounter; -def BWPort2Counter : PfmIssueCounter; -def BWPort3Counter : PfmIssueCounter; -def BWPort4Counter : PfmIssueCounter; -def BWPort5Counter : PfmIssueCounter; -def BWPort6Counter : PfmIssueCounter; -def BWPort7Counter : PfmIssueCounter; -def BWUopsCounter : PfmUopsCounter<"uops_issued:any">; +def BroadwellPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"BWPort0", "uops_executed_port:port_0">, + PfmIssueCounter<"BWPort1", "uops_executed_port:port_1">, + PfmIssueCounter<"BWPort2", "uops_executed_port:port_2">, + PfmIssueCounter<"BWPort3", "uops_executed_port:port_3">, + PfmIssueCounter<"BWPort4", "uops_executed_port:port_4">, + PfmIssueCounter<"BWPort5", "uops_executed_port:port_5">, + PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">, + PfmIssueCounter<"BWPort7", "uops_executed_port:port_7"> + ]; } -let SchedModel = SkylakeClientModel in { -def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def SKLPort0Counter : PfmIssueCounter; -def SKLPort1Counter : PfmIssueCounter; -def SKLPort2Counter : PfmIssueCounter; -def SKLPort3Counter : PfmIssueCounter; -def SKLPort4Counter : PfmIssueCounter; -def SKLPort5Counter : PfmIssueCounter; -def SKLPort6Counter : PfmIssueCounter; -def SKLPort7Counter : PfmIssueCounter; -def SKLUopsCounter : PfmUopsCounter<"uops_issued:any">; +def SkylakeClientPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"SKLPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"SKLPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"SKLPort2", "uops_dispatched_port:port_2">, + PfmIssueCounter<"SKLPort3", "uops_dispatched_port:port_3">, + PfmIssueCounter<"SKLPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"SKLPort5", "uops_dispatched_port:port_5">, + PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">, + PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7"> + ]; } -let SchedModel = SkylakeServerModel in { -def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def SKXPort0Counter : PfmIssueCounter; -def SKXPort1Counter : PfmIssueCounter; -def SKXPort2Counter : PfmIssueCounter; -def SKXPort3Counter : PfmIssueCounter; -def SKXPort4Counter : PfmIssueCounter; -def SKXPort5Counter : PfmIssueCounter; -def SKXPort6Counter : PfmIssueCounter; -def SKXPort7Counter : PfmIssueCounter; -def SKXUopsCounter : PfmUopsCounter<"uops_issued:any">; +def SkylakeServerPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"SKXPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"SKXPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"SKXPort2", "uops_dispatched_port:port_2">, + PfmIssueCounter<"SKXPort3", "uops_dispatched_port:port_3">, + PfmIssueCounter<"SKXPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"SKXPort5", "uops_dispatched_port:port_5">, + PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">, + PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7"> + ]; } -let SchedModel = BtVer2Model in { -def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">; -def JUopsCounter : PfmUopsCounter<"retired_uops">; -def JFPU0Counter : PfmIssueCounter; -def JFPU1Counter : PfmIssueCounter; +def BtVer2PfmCounters : ProcPfmCounters { + let CycleCounter = PfmCounter<"cpu_clk_unhalted">; + let UopsCounter = PfmCounter<"retired_uops">; + let IssueCounters = [ + PfmIssueCounter<"JFPU0", "dispatched_fpu:pipe0">, + PfmIssueCounter<"JFPU1", "dispatched_fpu:pipe1"> + ]; } Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -94,12 +94,8 @@ } const char *LatencyBenchmarkRunner::getCounterName() const { - if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo()) - llvm::report_fatal_error("sched model is missing extra processor info!"); - const char *CounterName = State.getSubtargetInfo() - .getSchedModel() - .getExtraProcessorInfo() - .PfmCounters.CycleCounter; + const char *CounterName = + State.getSubtargetInfo().getPfmCounters().CycleCounter; if (!CounterName) llvm::report_fatal_error("sched model does not define a cycle counter"); return CounterName; Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -253,20 +253,18 @@ std::vector UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, ScratchSpace &Scratch) const { - const auto &SchedModel = State.getSubtargetInfo().getSchedModel(); - - const auto RunMeasurement = [&Function, - &Scratch](const char *const Counters) { + const auto RunMeasurement = [&Function, &Scratch](const char *const Counter) { // We sum counts when there are several counters for a single ProcRes // (e.g. P23 on SandyBridge). int64_t CounterValue = 0; llvm::SmallVector CounterNames; - llvm::StringRef(Counters).split(CounterNames, ','); - for (const auto &CounterName : CounterNames) { + llvm::StringRef(Counter).split(CounterNames, '+'); + for (auto CounterName : CounterNames) { + CounterName = CounterName.trim(); pfm::PerfEvent UopPerfEvent(CounterName); if (!UopPerfEvent.valid()) llvm::report_fatal_error( - llvm::Twine("invalid perf event ").concat(Counters)); + llvm::Twine("invalid perf event ").concat(Counter)); pfm::Counter Counter(UopPerfEvent); Scratch.clear(); Counter.start(); @@ -278,19 +276,19 @@ }; std::vector Result; - const auto &PfmCounters = SchedModel.getExtraProcessorInfo().PfmCounters; + const llvm::MCPfmCountersInfo &PCI = + State.getSubtargetInfo().getPfmCounters(); // Uops per port. - for (unsigned ProcResIdx = 1; - ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) { - const char *const Counters = PfmCounters.IssueCounters[ProcResIdx]; - if (!Counters) - continue; - const double CounterValue = RunMeasurement(Counters); - Result.push_back(BenchmarkMeasure::Create( - SchedModel.getProcResource(ProcResIdx)->Name, CounterValue)); + for (const auto *IssueCounter = PCI.IssueCounters, + *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters; + IssueCounter != IssueCounterEnd; ++IssueCounter) { + const double CounterValue = + IssueCounter->Counter ? RunMeasurement(IssueCounter->Counter) : 0.0; + Result.push_back( + BenchmarkMeasure::Create(IssueCounter->ProcResName, CounterValue)); } // NumMicroOps. - if (const char *const UopsCounter = PfmCounters.UopsCounter) { + if (const char *const UopsCounter = PCI.UopsCounter) { const double CounterValue = RunMeasurement(UopsCounter); Result.push_back(BenchmarkMeasure::Create("NumMicroOps", CounterValue)); } Index: unittests/CodeGen/MachineInstrTest.cpp =================================================================== --- unittests/CodeGen/MachineInstrTest.cpp +++ unittests/CodeGen/MachineInstrTest.cpp @@ -48,7 +48,8 @@ public: BogusSubtarget(TargetMachine &TM) : TargetSubtargetInfo(Triple(""), "", "", {}, {}, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr), + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr), FL(), TL(TM) {} ~BogusSubtarget() override {} Index: utils/TableGen/CodeGenSchedule.h =================================================================== --- utils/TableGen/CodeGenSchedule.h +++ utils/TableGen/CodeGenSchedule.h @@ -239,11 +239,6 @@ // Optional Retire Control Unit definition. Record *RetireControlUnit; - // List of PfmCounters. - RecVec PfmIssueCounterDefs; - Record *PfmCycleCounterDef = nullptr; - Record *PfmUopsCounterDef = nullptr; - CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef, Record *IDef) : Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef), @@ -258,10 +253,7 @@ } bool hasExtraProcessorInfo() const { - return RetireControlUnit || !RegisterFiles.empty() || - !PfmIssueCounterDefs.empty() || - PfmCycleCounterDef != nullptr || - PfmUopsCounterDef != nullptr; + return RetireControlUnit || !RegisterFiles.empty(); } unsigned getProcResourceIdx(Record *PRDef) const; @@ -439,6 +431,9 @@ std::vector STIPredicates; + // Table of counter name -> counter index. + std::map PfmCounterNameTable; + public: CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT); @@ -462,6 +457,10 @@ return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses); } + const std::map &getPfmCounterNameTable() const { + return PfmCounterNameTable; + } + Record *getModelOrItinDef(Record *ProcDef) const { Record *ModelDef = ProcDef->getValueAsDef("SchedModel"); Record *ItinsDef = ProcDef->getValueAsDef("ProcItin"); Index: utils/TableGen/CodeGenSchedule.cpp =================================================================== --- utils/TableGen/CodeGenSchedule.cpp +++ utils/TableGen/CodeGenSchedule.cpp @@ -1772,30 +1772,33 @@ } } -// Collect all the RegisterFile definitions available in this target. +// Collect all the ProcPfmCounters definitions available in this target. void CodeGenSchedModels::collectPfmCounters() { - for (Record *Def : Records.getAllDerivedDefinitions("PfmIssueCounter")) { - CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); - PM.PfmIssueCounterDefs.emplace_back(Def); - } - for (Record *Def : Records.getAllDerivedDefinitions("PfmCycleCounter")) { - CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); - if (PM.PfmCycleCounterDef) { - PrintFatalError(Def->getLoc(), - "multiple cycle counters for " + - Def->getValueAsDef("SchedModel")->getName()); - } - PM.PfmCycleCounterDef = Def; - } - for (Record *Def : Records.getAllDerivedDefinitions("PfmUopsCounter")) { - CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); - if (PM.PfmUopsCounterDef) { - PrintFatalError(Def->getLoc(), - "multiple uops counters for " + - Def->getValueAsDef("SchedModel")->getName()); - } - PM.PfmUopsCounterDef = Def; - } + const auto AddPfmCounterName = [this](const Record *PfmCounterDef) { + const llvm::StringRef Counter = PfmCounterDef->getValueAsString("Counter"); + if (!Counter.empty()) + PfmCounterNameTable.emplace(Counter, 0); + }; + for (Record *Def : Records.getAllDerivedDefinitions("ProcPfmCounters")) { + // Check that ResourceNames are unique. + llvm::SmallSet Seen; + for (const Record *IssueCounter : + Def->getValueAsListOfDefs("IssueCounters")) { + const llvm::StringRef ResourceName = + IssueCounter->getValueAsString("ResourceName"); + if (ResourceName.empty()) + PrintFatalError(IssueCounter->getLoc(), "invalid empty ResourceName"); + if (!Seen.insert(ResourceName).second) + PrintFatalError(IssueCounter->getLoc(), + "duplicate ResourceName " + ResourceName); + AddPfmCounterName(IssueCounter); + } + AddPfmCounterName(Def->getValueAsDef("CycleCounter")); + AddPfmCounterName(Def->getValueAsDef("UopsCounter")); + } + unsigned Index = 0; + for (auto &NameAndIndex : PfmCounterNameTable) + NameAndIndex.second = Index++; } // Collect and sort WriteRes, ReadAdvance, and ProcResources. Index: utils/TableGen/SubtargetEmitter.cpp =================================================================== --- utils/TableGen/SubtargetEmitter.cpp +++ utils/TableGen/SubtargetEmitter.cpp @@ -95,6 +95,7 @@ raw_ostream &OS); void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS); + void EmitPfmCounters(raw_ostream &OS); void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name, char Separator); void EmitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel, @@ -693,80 +694,12 @@ return CostTblIndex; } -static bool EmitPfmIssueCountersTable(const CodeGenProcModel &ProcModel, - raw_ostream &OS) { - unsigned NumCounterDefs = 1 + ProcModel.ProcResourceDefs.size(); - std::vector CounterDefs(NumCounterDefs); - bool HasCounters = false; - for (const Record *CounterDef : ProcModel.PfmIssueCounterDefs) { - const Record *&CD = CounterDefs[ProcModel.getProcResourceIdx( - CounterDef->getValueAsDef("Resource"))]; - if (CD) { - PrintFatalError(CounterDef->getLoc(), - "multiple issue counters for " + - CounterDef->getValueAsDef("Resource")->getName()); - } - CD = CounterDef; - HasCounters = true; - } - if (!HasCounters) { - return false; - } - OS << "\nstatic const char* " << ProcModel.ModelName - << "PfmIssueCounters[] = {\n"; - for (unsigned i = 0; i != NumCounterDefs; ++i) { - const Record *CounterDef = CounterDefs[i]; - if (CounterDef) { - const auto PfmCounters = CounterDef->getValueAsListOfStrings("Counters"); - if (PfmCounters.empty()) - PrintFatalError(CounterDef->getLoc(), "empty counter list"); - OS << " \"" << PfmCounters[0]; - for (unsigned p = 1, e = PfmCounters.size(); p != e; ++p) - OS << ",\" \"" << PfmCounters[p]; - OS << "\", // #" << i << " = "; - OS << CounterDef->getValueAsDef("Resource")->getName() << "\n"; - } else { - OS << " nullptr, // #" << i << "\n"; - } - } - OS << "};\n"; - return true; -} - -static void EmitPfmCounters(const CodeGenProcModel &ProcModel, - const bool HasPfmIssueCounters, raw_ostream &OS) { - OS << " {\n"; - // Emit the cycle counter. - if (ProcModel.PfmCycleCounterDef) - OS << " \"" << ProcModel.PfmCycleCounterDef->getValueAsString("Counter") - << "\", // Cycle counter.\n"; - else - OS << " nullptr, // No cycle counter.\n"; - - // Emit the uops counter. - if (ProcModel.PfmUopsCounterDef) - OS << " \"" << ProcModel.PfmUopsCounterDef->getValueAsString("Counter") - << "\", // Uops counter.\n"; - else - OS << " nullptr, // No uops counter.\n"; - - // Emit a reference to issue counters table. - if (HasPfmIssueCounters) - OS << " " << ProcModel.ModelName << "PfmIssueCounters\n"; - else - OS << " nullptr // No issue counters.\n"; - OS << " }\n"; -} - void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS) { // Generate a table of register file descriptors (one entry per each user // defined register file), and a table of register costs. unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS); - // Generate a table of ProcRes counter names. - const bool HasPfmIssueCounters = EmitPfmIssueCountersTable(ProcModel, OS); - // Now generate a table for the extra processor info. OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName << "ExtraInfo = {\n "; @@ -779,9 +712,83 @@ EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(), NumCostEntries, OS); - EmitPfmCounters(ProcModel, HasPfmIssueCounters, OS); + OS << "};\n"; +} + +static void +EmitPfmCountersInfo(const Record &Def, + std::map PfmCounterNameTable, + llvm::StringRef Target, unsigned &IssueCountersTableOffset, + raw_ostream &OS) { + const auto CycleCounter = + Def.getValueAsDef("CycleCounter")->getValueAsString("Counter"); + const auto UopsCounter = + Def.getValueAsDef("UopsCounter")->getValueAsString("Counter"); + const size_t NumIssueCounters = + Def.getValueAsListOfDefs("IssueCounters").size(); + + // This is the default, do not emit. + if (CycleCounter.empty() && UopsCounter.empty() && NumIssueCounters == 0) + return; + + OS << "\nstatic const llvm::MCPfmCountersInfo " << Target << Def.getName() + << " = {\n"; + + // Cycle Counter. + if (CycleCounter.empty()) + OS << " nullptr, // No cycle counter.\n"; + else + OS << " " << Target << "PfmCounterNames[" + << PfmCounterNameTable[CycleCounter] << "], // Cycle counter\n"; + + // Uops Counter. + if (UopsCounter.empty()) + OS << " nullptr, // No uops counter.\n"; + else + OS << " " << Target << "PfmCounterNames[" + << PfmCounterNameTable[UopsCounter] << "], // Uops counter\n"; + + // Issue Counters + if (NumIssueCounters == 0) + OS << " nullptr, // No issue counters.\n 0\n"; + else + OS << " " << Target << "PfmIssueCounters + " << IssueCountersTableOffset + << ", " << NumIssueCounters << " // Issue counters.\n"; OS << "};\n"; + IssueCountersTableOffset += NumIssueCounters; +} + +void SubtargetEmitter::EmitPfmCounters(raw_ostream &OS) { + // Emit the counter name table. + const auto &PfmCounterNameTable = SchedModels.getPfmCounterNameTable(); + OS << "\nstatic const char* " << Target << "PfmCounterNames[] = {\n"; + for (const auto &NameAndIndex : PfmCounterNameTable) + OS << " \"" << NameAndIndex.first << "\", // " << NameAndIndex.second + << "\n"; + OS << "};\n\n"; + + // Emit the IssueCounters table. + const RecVec PfmCounterDefs = + Records.getAllDerivedDefinitions("ProcPfmCounters"); + OS << "static const llvm::MCPfmCountersInfo::IssueCounter " << Target + << "PfmIssueCounters[] = {\n"; + for (const Record *Def : PfmCounterDefs) { + for (const Record *ICDef : Def->getValueAsListOfDefs("IssueCounters")) + OS << " { " << Target << "PfmCounterNames[" + << PfmCounterNameTable.find(ICDef->getValueAsString("Counter"))->second + << "], \"" << ICDef->getValueAsString("ResourceName") << "\"},\n"; + } + + OS << "};\n"; + + // Now generate the MCPfmCountersInfos. + unsigned IssueCountersTableOffset = 0; + for (const Record *Def : PfmCounterDefs) + EmitPfmCountersInfo(*Def, PfmCounterNameTable, Target, + IssueCountersTableOffset, OS); + + OS << "\n"; } void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, @@ -1406,7 +1413,8 @@ } // -// EmitProcessorLookup - generate cpu name to itinerary lookup table. +// EmitProcessorLookup - generate cpu name to sched model and cpu name to pfm +// counters lookup tables. // void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) { // Gather and sort processor information @@ -1414,12 +1422,11 @@ Records.getAllDerivedDefinitions("Processor"); llvm::sort(ProcessorList, LessRecordFieldName()); - // Begin processor table + // Begin processor->sched model table OS << "\n"; - OS << "// Sorted (by key) array of itineraries for CPU subtype.\n" - << "extern const llvm::SubtargetInfoKV " - << Target << "ProcSchedKV[] = {\n"; - + OS << "// Sorted (by key) array of sched model for CPU subtype.\n" + << "extern const llvm::SubtargetInfoKV " << Target + << "ProcSchedKV[] = {\n"; // For each processor for (Record *Processor : ProcessorList) { StringRef Name = Processor->getValueAsString("Name"); @@ -1429,8 +1436,33 @@ // Emit as { "cpu", procinit }, OS << " { \"" << Name << "\", (const void *)&" << ProcModelName << " },\n"; } + // End processor->sched model table + OS << "};\n"; - // End processor table + // Begin processor->sched model table + OS << "\n"; + OS << "// Sorted (by key) array of pfm counters for CPU subtype.\n" + << "extern const llvm::SubtargetInfoKV " << Target << "ProcPfmKV[] = {\n"; + // For each processor + for (Record *Processor : ProcessorList) { + const Record *Def = Processor->getValueAsDef("PfmCounters"); + const auto CycleCounter = + Def->getValueAsDef("CycleCounter")->getValueAsString("Counter"); + const auto UopsCounter = + Def->getValueAsDef("UopsCounter")->getValueAsString("Counter"); + const size_t NumIssueCounters = + Def->getValueAsListOfDefs("IssueCounters").size(); + + // Emit as { "cpu", procinit }, + OS << " { \"" << Processor->getValueAsString("Name") + << "\", (const void *)&"; + if (CycleCounter.empty() && UopsCounter.empty() && NumIssueCounters == 0) + OS << "MCPfmCountersInfo::getDefault()"; + else + OS << Target << Def->getName(); + OS << " },\n"; + } + // End processor->sched model table OS << "};\n"; } @@ -1447,6 +1479,8 @@ << "#define DBGFIELD(x)\n" << "#endif\n"; + EmitPfmCounters(OS); + if (SchedModels.hasItineraries()) { std::vector> ProcItinLists; // Emit the stage data @@ -1671,7 +1705,7 @@ // Emit target predicates. emitSchedModelHelpersImpl(OS); - + OS << "} // " << ClassName << "::resolveSchedClass\n\n"; OS << "unsigned " << ClassName @@ -1768,11 +1802,12 @@ << " StringRef CPU, StringRef FS, ArrayRef PF,\n" << " ArrayRef PD,\n" << " const SubtargetInfoKV *ProcSched,\n" + << " const SubtargetInfoKV *ProcPfm,\n" << " const MCWriteProcResEntry *WPR,\n" << " const MCWriteLatencyEntry *WL,\n" << " const MCReadAdvanceEntry *RA, const InstrStage *IS,\n" << " const unsigned *OC, const unsigned *FP) :\n" - << " MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched,\n" + << " MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, ProcPfm,\n" << " WPR, WL, RA, IS, OC, FP) { }\n\n" << " unsigned resolveVariantSchedClass(unsigned SchedClass,\n" << " const MCInst *MI, unsigned CPUID) const override {\n" @@ -1854,10 +1889,9 @@ else OS << "None, "; OS << '\n'; OS.indent(22); - OS << Target << "ProcSchedKV, " - << Target << "WriteProcResTable, " - << Target << "WriteLatencyTable, " - << Target << "ReadAdvanceTable, "; + OS << Target << "ProcSchedKV, " << Target << "ProcPfmKV, " << Target + << "WriteProcResTable, " << Target << "WriteLatencyTable, " << Target + << "ReadAdvanceTable, "; OS << '\n'; OS.indent(22); if (SchedModels.hasItineraries()) { OS << Target << "Stages, " @@ -1923,6 +1957,7 @@ OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n"; OS << "extern const llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n"; OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcSchedKV[];\n"; + OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcPfmKV[];\n"; OS << "extern const llvm::MCWriteProcResEntry " << Target << "WriteProcResTable[];\n"; OS << "extern const llvm::MCWriteLatencyEntry " @@ -1948,10 +1983,9 @@ else OS << "None, "; OS << '\n'; OS.indent(24); - OS << Target << "ProcSchedKV, " - << Target << "WriteProcResTable, " - << Target << "WriteLatencyTable, " - << Target << "ReadAdvanceTable, "; + OS << Target << "ProcSchedKV, " << Target << "ProcPfmKV, " << Target + << "WriteProcResTable, " << Target << "WriteLatencyTable, " << Target + << "ReadAdvanceTable, "; OS << '\n'; OS.indent(24); if (SchedModels.hasItineraries()) { OS << Target << "Stages, "