Index: include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- include/llvm/CodeGen/TargetSubtargetInfo.h +++ include/llvm/CodeGen/TargetSubtargetInfo.h @@ -65,6 +65,7 @@ ArrayRef PF, ArrayRef PD, const SubtargetInfoKV *ProcSched, + const SubtargetInfoKV *ProcPfm, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, @@ -162,7 +163,7 @@ /// /// Similar in behavior to `isZeroIdiom`. However, it knows how to identify /// all dependency breaking instructions (i.e. not just zero-idioms). - /// + /// /// As for `isZeroIdiom`, this method returns a mask of "broken" dependencies. /// (See method `isZeroIdiom` for a detailed description of Mask). virtual bool isDependencyBreaking(const MachineInstr *MI, APInt &Mask) const { Index: include/llvm/MC/MCSchedule.h =================================================================== --- include/llvm/MC/MCSchedule.h +++ include/llvm/MC/MCSchedule.h @@ -176,22 +176,31 @@ unsigned NumRegisterFiles; const MCRegisterCostEntry *RegisterCostTable; unsigned NumRegisterCostEntries; +}; - struct PfmCountersInfo { - // An optional name of a performance counter that can be used to measure - // cycles. - const char *CycleCounter; - - // An optional name of a performance counter that can be used to measure - // uops. - const char *UopsCounter; - - // For each MCProcResourceDesc defined by the processor, an optional list of - // names of performance counters that can be used to measure the resource - // utilization. - const char **IssueCounters; +struct MCPfmCountersInfo { + // An optional name of a performance counter that can be used to measure + // cycles. + const char *const CycleCounter; + + // An optional name of a performance counter that can be used to measure + // uops. + const char *const UopsCounter; + + // An IssueCounter specifies how to measure uops issued to specific proc + // resources. + struct IssueCounter { + const char *const Counter; + // The name of the ProcResource that this counter measures. + const char *const ProcResName; }; - PfmCountersInfo PfmCounters; + // An optional list of IssueCounters. + const IssueCounter *const IssueCounters; + const unsigned NumIssueCounters; + + /// Returns the default initialized pfm counters. + static const MCPfmCountersInfo &getDefault() { return Default; } + static const MCPfmCountersInfo Default; }; /// Machine model for scheduling, bundling, and heuristics. Index: include/llvm/MC/MCSubtargetInfo.h =================================================================== --- include/llvm/MC/MCSubtargetInfo.h +++ include/llvm/MC/MCSubtargetInfo.h @@ -41,10 +41,12 @@ // Scheduler machine model const SubtargetInfoKV *ProcSchedModels; + const SubtargetInfoKV *ProcPfmCounters; const MCWriteProcResEntry *WriteProcResTable; const MCWriteLatencyEntry *WriteLatencyTable; const MCReadAdvanceEntry *ReadAdvanceTable; const MCSchedModel *CPUSchedModel; + const MCPfmCountersInfo *CPUPfmCounters; const InstrStage *Stages; // Instruction itinerary stages const unsigned *OperandCycles; // Itinerary operand cycles @@ -57,6 +59,7 @@ ArrayRef PF, ArrayRef PD, const SubtargetInfoKV *ProcSched, + const SubtargetInfoKV *ProcPfm, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, const unsigned *FP); @@ -115,6 +118,12 @@ /// Get the machine model for this subtarget's CPU. const MCSchedModel &getSchedModel() const { return *CPUSchedModel; } + /// Get the pfm counters of a CPU. + const MCPfmCountersInfo &getPfmCountersForCPU(StringRef CPU) const; + + /// Get the pfm counters for this subtarget's CPU. + const MCPfmCountersInfo &getPfmCounters() const { return *CPUPfmCounters; } + /// Return an iterator at the first process resource consumed by the given /// scheduling class. const MCWriteProcResEntry *getWriteProcResBegin( Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -1453,7 +1453,8 @@ // by the scheduler. Each Processor definition requires corresponding // instruction itineraries. // -class Processor f> { +class Processor f, + ProcPfmCounters counters = NoProcPfmCounters> { // Name - Chip set name. Used by command line (-mcpu=) to determine the // appropriate target chip. // @@ -1469,6 +1470,8 @@ // Features - list of list Features = f; + + ProcPfmCounters PfmCounters = counters; } // ProcessorModel allows subtargets to specify the more general @@ -1477,8 +1480,9 @@ // // Although this class always passes NoItineraries to the Processor // class, the SchedMachineModel may still define valid Itineraries. -class ProcessorModel f> - : Processor { +class ProcessorModel f, + ProcPfmCounters counters = NoProcPfmCounters> + : Processor { let SchedModel = m; } Index: include/llvm/Target/TargetSchedule.td =================================================================== --- include/llvm/Target/TargetSchedule.td +++ include/llvm/Target/TargetSchedule.td @@ -531,29 +531,32 @@ } // Allow the definition of hardware counters. -class PfmCounter { - SchedMachineModel SchedModel = ?; -} - -// Each processor can define how to measure cycles by defining a -// PfmCycleCounter. -class PfmCycleCounter : PfmCounter { +class PfmCounter { + // The name of the counter that measures events. + // The name can be "some_counter + some_other_counter", in which case the + // measured value is the sum of events on these counters. string Counter = counter; } -// Each ProcResourceUnits can define how to measure issued uops by defining -// a PfmIssueCounter. -class PfmIssueCounter counters> - : PfmCounter{ - // The resource units on which uops are issued. - ProcResourceUnits Resource = resource; - // The list of counters that measure issue events. - list Counters = counters; +// Issue counters can be tied to a ProcResource +class PfmIssueCounter + : PfmCounter { + // The name of the ProcResource on which uops are issued. This is used by + // llvm-exegesis to compare measurements with values in the SchedModels. + // If the CPU has a sched model, this should correspond to the name of a + // ProcResource. + string ResourceName = resource_name; } -// Each processor can define how to measure NumMicroOps by defining a -// PfmUopsCounter. -class PfmUopsCounter : PfmCounter { - string Counter = counter; +def NoPfmCounter : PfmCounter <""> {} + +class ProcPfmCounters { + // Processors can define how to measure cycles by defining a CycleCounter. + PfmCounter CycleCounter = NoPfmCounter; + // Processors can define how to measure uops by defining a UopsCounter. + PfmCounter UopsCounter = NoPfmCounter; + // Processors can define how to measure issued uops by defining IssueCounters. + list IssueCounters = []; } +def NoProcPfmCounters : ProcPfmCounters {} Index: lib/CodeGen/TargetSubtargetInfo.cpp =================================================================== --- lib/CodeGen/TargetSubtargetInfo.cpp +++ lib/CodeGen/TargetSubtargetInfo.cpp @@ -26,11 +26,12 @@ TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef FS, ArrayRef PF, ArrayRef PD, - const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, - const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) - : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { -} + const SubtargetInfoKV *ProcSched, const SubtargetInfoKV *ProcPfm, + const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, + const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, + const unsigned *FP) + : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, ProcPfm, WPR, WL, RA, IS, + OC, FP) {} TargetSubtargetInfo::~TargetSubtargetInfo() = default; Index: lib/MC/MCSchedule.cpp =================================================================== --- lib/MC/MCSchedule.cpp +++ lib/MC/MCSchedule.cpp @@ -85,9 +85,8 @@ llvm_unreachable("unsupported variant scheduling class"); } -double -MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, - const MCSchedClassDesc &SCDesc) { +double MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, + const MCSchedClassDesc &SCDesc) { Optional Throughput; const MCSchedModel &SM = STI.getSchedModel(); const MCWriteProcResEntry *I = STI.getWriteProcResBegin(&SCDesc); @@ -107,10 +106,9 @@ return ((double)SCDesc.NumMicroOps) / SM.IssueWidth; } -double -MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, - const MCInstrInfo &MCII, - const MCInst &Inst) const { +double MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, + const MCInstrInfo &MCII, + const MCInst &Inst) const { unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass(); const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass); @@ -131,9 +129,8 @@ llvm_unreachable("unsupported variant scheduling class"); } -double -MCSchedModel::getReciprocalThroughput(unsigned SchedClass, - const InstrItineraryData &IID) { +double MCSchedModel::getReciprocalThroughput(unsigned SchedClass, + const InstrItineraryData &IID) { Optional Throughput; const InstrStage *I = IID.beginStage(SchedClass); const InstrStage *E = IID.endStage(SchedClass); @@ -150,3 +147,8 @@ // that it can execute at the maximum default issue width. return 1.0 / DefaultIssueWidth; } + +static_assert(std::is_pod::value, + "We shouldn't have a static constructor here"); +const MCPfmCountersInfo MCPfmCountersInfo::Default = {nullptr, nullptr, + nullptr}; Index: lib/MC/MCSubtargetInfo.cpp =================================================================== --- lib/MC/MCSubtargetInfo.cpp +++ lib/MC/MCSubtargetInfo.cpp @@ -29,10 +29,13 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures); - if (!CPU.empty()) + if (!CPU.empty()) { CPUSchedModel = &getSchedModelForCPU(CPU); - else + CPUPfmCounters = &getPfmCountersForCPU(CPU); + } else { CPUSchedModel = &MCSchedModel::GetDefaultSchedModel(); + CPUPfmCounters = &MCPfmCountersInfo::getDefault(); + } } void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) { @@ -42,12 +45,14 @@ MCSubtargetInfo::MCSubtargetInfo( const Triple &TT, StringRef C, StringRef FS, ArrayRef PF, ArrayRef PD, - const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, - const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) + const SubtargetInfoKV *ProcSched, const SubtargetInfoKV *ProcPfm, + const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, + const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, + const unsigned *FP) : TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD), - ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL), - ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) { + ProcSchedModels(ProcSched), ProcPfmCounters(ProcPfm), + WriteProcResTable(WPR), WriteLatencyTable(WL), ReadAdvanceTable(RA), + Stages(IS), OperandCycles(OC), ForwardingPaths(FP) { InitMCProcessorInfo(CPU, FS); } @@ -83,29 +88,40 @@ return (FeatureBits & All) == Set; } -const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { - assert(ProcSchedModels && "Processor machine model not available!"); - - ArrayRef SchedModels(ProcSchedModels, ProcDesc.size()); - - assert(std::is_sorted(SchedModels.begin(), SchedModels.end(), - [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) { - return strcmp(LHS.Key, RHS.Key) < 0; - }) && - "Processor machine model table is not sorted"); +template +static const ValueT &getValueForCpu(StringRef CPU, + ArrayRef ProcTs) { + assert(std::is_sorted( + ProcTs.begin(), ProcTs.end(), + [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) { + return strcmp(LHS.Key, RHS.Key) < 0; + }) && + "SubtargetInfoKV table is not sorted"); // Find entry - auto Found = - std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU); - if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) { + auto Found = std::lower_bound(ProcTs.begin(), ProcTs.end(), CPU); + if (Found == ProcTs.end() || StringRef(Found->Key) != CPU) { if (CPU != "help") // Don't error if the user asked for help. errs() << "'" << CPU << "' is not a recognized processor for this target" << " (ignoring processor)\n"; - return MCSchedModel::GetDefaultSchedModel(); + return GetDefault(); } - assert(Found->Value && "Missing processor SchedModel value"); - return *(const MCSchedModel *)Found->Value; + assert(Found->Value && "Missing processor value"); + return *(const ValueT *)Found->Value; +} + +const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { + assert(ProcSchedModels && "Processor machine model not available!"); + return getValueForCpu( + CPU, ArrayRef(ProcSchedModels, ProcDesc.size())); +} + +const MCPfmCountersInfo & +MCSubtargetInfo::getPfmCountersForCPU(StringRef CPU) const { + assert(ProcPfmCounters && "Processor pfm counters are available!"); + return getValueForCpu( + CPU, ArrayRef(ProcPfmCounters, ProcDesc.size())); } InstrItineraryData Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -411,6 +411,12 @@ include "X86RegisterInfo.td" include "X86RegisterBanks.td" +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "X86PfmCounters.td" + //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// @@ -463,7 +469,7 @@ "IntelIcelakeServer", "Intel Icelake Server processors">; class Proc Features> - : ProcessorModel; + : ProcessorModel; def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; @@ -613,8 +619,10 @@ class ProcModel ProcFeatures, - list OtherFeatures> : - ProcessorModel; + list OtherFeatures, + ProcPfmCounters counters = NoProcPfmCounters> : + ProcessorModel; def GLMFeatures : ProcessorFeatures<[], [ FeatureX87, @@ -739,7 +747,7 @@ SNBFeatures.Value, [ FeatureSlowUAMem32, FeaturePOPCNTFalseDeps -]>; +], SandyBridgePfmCounters>; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. @@ -753,7 +761,7 @@ IVBFeatures.Value, [ FeatureSlowUAMem32, FeaturePOPCNTFalseDeps -]>; +], SandyBridgePfmCounters>; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. @@ -774,7 +782,7 @@ ProcIntelHSW, FeaturePOPCNTFalseDeps, FeatureLZCNTFalseDeps -]>; +], HaswellPfmCounters>; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. @@ -788,7 +796,7 @@ ProcIntelBDW, FeaturePOPCNTFalseDeps, FeatureLZCNTFalseDeps -]>; +], BroadwellPfmCounters>; def : BroadwellProc<"broadwell">; def SKLFeatures : ProcessorFeatures; +], SkylakeClientPfmCounters>; def : SkylakeClientProc<"skylake">; def KNLFeatures : ProcessorFeatures; +], SkylakeServerPfmCounters>; def : SkylakeServerProc<"skylake-avx512">; def : SkylakeServerProc<"skx">; // Legacy alias. @@ -1248,9 +1256,3 @@ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; let AllowRegisterRenaming = 1; } - -//===----------------------------------------------------------------------===// -// Pfm Counters -//===----------------------------------------------------------------------===// - -include "X86PfmCounters.td" Index: lib/Target/X86/X86PfmCounters.td =================================================================== --- lib/Target/X86/X86PfmCounters.td +++ lib/Target/X86/X86PfmCounters.td @@ -11,73 +11,86 @@ // //===----------------------------------------------------------------------===// -let SchedModel = SandyBridgeModel in { -def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def SBPort0Counter : PfmIssueCounter; -def SBPort1Counter : PfmIssueCounter; -def SBPort23Counter : PfmIssueCounter; -def SBPort4Counter : PfmIssueCounter; -def SBPort5Counter : PfmIssueCounter; -def SBUopsCounter : PfmUopsCounter<"uops_issued:any">; +def UnhaltedCoreCyclesPfmCounter : PfmCounter<"unhalted_core_cycles">; +def UopsIssuedPfmCounter : PfmCounter<"uops_issued:any">; + +def SandyBridgePfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"SBPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"SBPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"SBPort23", "uops_dispatched_port:port_2 + uops_dispatched_port:port_3">, + PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5"> + ]; } -let SchedModel = HaswellModel in { -def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def HWPort0Counter : PfmIssueCounter; -def HWPort1Counter : PfmIssueCounter; -def HWPort2Counter : PfmIssueCounter; -def HWPort3Counter : PfmIssueCounter; -def HWPort4Counter : PfmIssueCounter; -def HWPort5Counter : PfmIssueCounter; -def HWPort6Counter : PfmIssueCounter; -def HWPort7Counter : PfmIssueCounter; -def HWUopsCounter : PfmUopsCounter<"uops_issued:any">; +def HaswellPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"HWPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"HWPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"HWPort2", "uops_dispatched_port:port_2">, + PfmIssueCounter<"HWPort3", "uops_dispatched_port:port_3">, + PfmIssueCounter<"HWPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"HWPort5", "uops_dispatched_port:port_5">, + PfmIssueCounter<"HWPort6", "uops_dispatched_port:port_6">, + PfmIssueCounter<"HWPort7", "uops_dispatched_port:port_7"> + ]; } -let SchedModel = BroadwellModel in { -def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def BWPort0Counter : PfmIssueCounter; -def BWPort1Counter : PfmIssueCounter; -def BWPort2Counter : PfmIssueCounter; -def BWPort3Counter : PfmIssueCounter; -def BWPort4Counter : PfmIssueCounter; -def BWPort5Counter : PfmIssueCounter; -def BWPort6Counter : PfmIssueCounter; -def BWPort7Counter : PfmIssueCounter; -def BWUopsCounter : PfmUopsCounter<"uops_issued:any">; +def BroadwellPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"BWPort0", "uops_executed_port:port_0">, + PfmIssueCounter<"BWPort1", "uops_executed_port:port_1">, + PfmIssueCounter<"BWPort2", "uops_executed_port:port_2">, + PfmIssueCounter<"BWPort3", "uops_executed_port:port_3">, + PfmIssueCounter<"BWPort4", "uops_executed_port:port_4">, + PfmIssueCounter<"BWPort5", "uops_executed_port:port_5">, + PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">, + PfmIssueCounter<"BWPort7", "uops_executed_port:port_7"> + ]; } -let SchedModel = SkylakeClientModel in { -def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def SKLPort0Counter : PfmIssueCounter; -def SKLPort1Counter : PfmIssueCounter; -def SKLPort2Counter : PfmIssueCounter; -def SKLPort3Counter : PfmIssueCounter; -def SKLPort4Counter : PfmIssueCounter; -def SKLPort5Counter : PfmIssueCounter; -def SKLPort6Counter : PfmIssueCounter; -def SKLPort7Counter : PfmIssueCounter; -def SKLUopsCounter : PfmUopsCounter<"uops_issued:any">; +def SkylakeClientPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"SKLPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"SKLPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"SKLPort2", "uops_dispatched_port:port_2">, + PfmIssueCounter<"SKLPort3", "uops_dispatched_port:port_3">, + PfmIssueCounter<"SKLPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"SKLPort5", "uops_dispatched_port:port_5">, + PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">, + PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7"> + ]; } -let SchedModel = SkylakeServerModel in { -def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">; -def SKXPort0Counter : PfmIssueCounter; -def SKXPort1Counter : PfmIssueCounter; -def SKXPort2Counter : PfmIssueCounter; -def SKXPort3Counter : PfmIssueCounter; -def SKXPort4Counter : PfmIssueCounter; -def SKXPort5Counter : PfmIssueCounter; -def SKXPort6Counter : PfmIssueCounter; -def SKXPort7Counter : PfmIssueCounter; -def SKXUopsCounter : PfmUopsCounter<"uops_issued:any">; +def SkylakeServerPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"SKXPort0", "uops_dispatched_port:port_0">, + PfmIssueCounter<"SKXPort1", "uops_dispatched_port:port_1">, + PfmIssueCounter<"SKXPort2", "uops_dispatched_port:port_2">, + PfmIssueCounter<"SKXPort3", "uops_dispatched_port:port_3">, + PfmIssueCounter<"SKXPort4", "uops_dispatched_port:port_4">, + PfmIssueCounter<"SKXPort5", "uops_dispatched_port:port_5">, + PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">, + PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7"> + ]; } -let SchedModel = BtVer2Model in { -def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">; -def JUopsCounter : PfmUopsCounter<"retired_uops">; -def JFPU0Counter : PfmIssueCounter; -def JFPU1Counter : PfmIssueCounter; +def BtVer2PfmCounters : ProcPfmCounters { + let CycleCounter = PfmCounter<"cpu_clk_unhalted">; + let UopsCounter = PfmCounter<"retired_uops">; + let IssueCounters = [ + PfmIssueCounter<"JFPU0", "dispatched_fpu:pipe0">, + PfmIssueCounter<"JFPU1", "dispatched_fpu:pipe1"> + ]; } Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -94,12 +94,8 @@ } const char *LatencyBenchmarkRunner::getCounterName() const { - if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo()) - llvm::report_fatal_error("sched model is missing extra processor info!"); - const char *CounterName = State.getSubtargetInfo() - .getSchedModel() - .getExtraProcessorInfo() - .PfmCounters.CycleCounter; + const char *CounterName = + State.getSubtargetInfo().getPfmCounters().CycleCounter; if (!CounterName) llvm::report_fatal_error("sched model does not define a cycle counter"); return CounterName; Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -253,20 +253,18 @@ std::vector UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, ScratchSpace &Scratch) const { - const auto &SchedModel = State.getSubtargetInfo().getSchedModel(); - - const auto RunMeasurement = [&Function, - &Scratch](const char *const Counters) { + const auto RunMeasurement = [&Function, &Scratch](const char *const Counter) { // We sum counts when there are several counters for a single ProcRes // (e.g. P23 on SandyBridge). int64_t CounterValue = 0; llvm::SmallVector CounterNames; - llvm::StringRef(Counters).split(CounterNames, ','); - for (const auto &CounterName : CounterNames) { + llvm::StringRef(Counter).split(CounterNames, '+'); + for (auto CounterName : CounterNames) { + CounterName = CounterName.trim(); pfm::PerfEvent UopPerfEvent(CounterName); if (!UopPerfEvent.valid()) llvm::report_fatal_error( - llvm::Twine("invalid perf event ").concat(Counters)); + llvm::Twine("invalid perf event ").concat(Counter)); pfm::Counter Counter(UopPerfEvent); Scratch.clear(); Counter.start(); @@ -278,19 +276,19 @@ }; std::vector Result; - const auto &PfmCounters = SchedModel.getExtraProcessorInfo().PfmCounters; + const llvm::MCPfmCountersInfo &PCI = + State.getSubtargetInfo().getPfmCounters(); // Uops per port. - for (unsigned ProcResIdx = 1; - ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) { - const char *const Counters = PfmCounters.IssueCounters[ProcResIdx]; - if (!Counters) - continue; - const double CounterValue = RunMeasurement(Counters); - Result.push_back(BenchmarkMeasure::Create( - SchedModel.getProcResource(ProcResIdx)->Name, CounterValue)); + for (const auto *IssueCounter = PCI.IssueCounters, + *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters; + IssueCounter != IssueCounterEnd; ++IssueCounter) { + const double CounterValue = + IssueCounter->Counter ? RunMeasurement(IssueCounter->Counter) : 0.0; + Result.push_back( + BenchmarkMeasure::Create(IssueCounter->ProcResName, CounterValue)); } // NumMicroOps. - if (const char *const UopsCounter = PfmCounters.UopsCounter) { + if (const char *const UopsCounter = PCI.UopsCounter) { const double CounterValue = RunMeasurement(UopsCounter); Result.push_back(BenchmarkMeasure::Create("NumMicroOps", CounterValue)); } Index: unittests/CodeGen/MachineInstrTest.cpp =================================================================== --- unittests/CodeGen/MachineInstrTest.cpp +++ unittests/CodeGen/MachineInstrTest.cpp @@ -48,7 +48,8 @@ public: BogusSubtarget(TargetMachine &TM) : TargetSubtargetInfo(Triple(""), "", "", {}, {}, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr), + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr), FL(), TL(TM) {} ~BogusSubtarget() override {} Index: utils/TableGen/CodeGenSchedule.h =================================================================== --- utils/TableGen/CodeGenSchedule.h +++ utils/TableGen/CodeGenSchedule.h @@ -239,11 +239,6 @@ // Optional Retire Control Unit definition. Record *RetireControlUnit; - // List of PfmCounters. - RecVec PfmIssueCounterDefs; - Record *PfmCycleCounterDef = nullptr; - Record *PfmUopsCounterDef = nullptr; - CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef, Record *IDef) : Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef), @@ -258,10 +253,7 @@ } bool hasExtraProcessorInfo() const { - return RetireControlUnit || !RegisterFiles.empty() || - !PfmIssueCounterDefs.empty() || - PfmCycleCounterDef != nullptr || - PfmUopsCounterDef != nullptr; + return RetireControlUnit || !RegisterFiles.empty(); } unsigned getProcResourceIdx(Record *PRDef) const; @@ -439,6 +431,9 @@ std::vector STIPredicates; + // Table of counter name -> counter index. + std::map PfmCounterNameTable; + public: CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT); @@ -462,6 +457,10 @@ return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses); } + const std::map &getPfmCounterNameTable() const { + return PfmCounterNameTable; + } + Record *getModelOrItinDef(Record *ProcDef) const { Record *ModelDef = ProcDef->getValueAsDef("SchedModel"); Record *ItinsDef = ProcDef->getValueAsDef("ProcItin"); Index: utils/TableGen/CodeGenSchedule.cpp =================================================================== --- utils/TableGen/CodeGenSchedule.cpp +++ utils/TableGen/CodeGenSchedule.cpp @@ -53,7 +53,7 @@ // (instregex "OpcPat",...) Find all instructions matching an opcode pattern. struct InstRegexOp : public SetTheory::Operator { const CodeGenTarget &Target; - InstRegexOp(const CodeGenTarget &t): Target(t) {} + InstRegexOp(const CodeGenTarget &t) : Target(t) {} /// Remove any text inside of parentheses from S. static std::string removeParens(llvm::StringRef S) { @@ -166,8 +166,8 @@ /// CodeGenModels ctor interprets machine model records and populates maps. CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK, - const CodeGenTarget &TGT): - Records(RK), Target(TGT) { + const CodeGenTarget &TGT) + : Records(RK), Target(TGT) { Sets.addFieldExpander("InstRW", "Instrs"); @@ -350,7 +350,7 @@ unsigned OpcodeIdx = Opcode2Index[Opcode]; if (OpcodeMasks[OpcodeIdx].first[ProcIndex]) { std::string Message = - "Opcode " + Opcode->getName().str() + + "Opcode " + Opcode->getName().str() + " used by multiple InstructionEquivalenceClass definitions."; PrintFatalError(EC->getLoc(), Message); } @@ -365,31 +365,29 @@ // Sort OpcodeMappings elements based on their CPU and predicate masks. // As a last resort, order elements by opcode identifier. - llvm::sort(OpcodeMappings, - [&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) { - unsigned LhsIdx = Opcode2Index[Lhs.first]; - unsigned RhsIdx = Opcode2Index[Rhs.first]; - std::pair &LhsMasks = OpcodeMasks[LhsIdx]; - std::pair &RhsMasks = OpcodeMasks[RhsIdx]; - - if (LhsMasks.first != RhsMasks.first) { - if (LhsMasks.first.countPopulation() < - RhsMasks.first.countPopulation()) - return true; - return LhsMasks.first.countLeadingZeros() > - RhsMasks.first.countLeadingZeros(); - } - - if (LhsMasks.second != RhsMasks.second) { - if (LhsMasks.second.countPopulation() < - RhsMasks.second.countPopulation()) - return true; - return LhsMasks.second.countLeadingZeros() > - RhsMasks.second.countLeadingZeros(); - } - - return LhsIdx < RhsIdx; - }); + llvm::sort(OpcodeMappings, [&](const OpcodeMapPair &Lhs, + const OpcodeMapPair &Rhs) { + unsigned LhsIdx = Opcode2Index[Lhs.first]; + unsigned RhsIdx = Opcode2Index[Rhs.first]; + std::pair &LhsMasks = OpcodeMasks[LhsIdx]; + std::pair &RhsMasks = OpcodeMasks[RhsIdx]; + + if (LhsMasks.first != RhsMasks.first) { + if (LhsMasks.first.countPopulation() < RhsMasks.first.countPopulation()) + return true; + return LhsMasks.first.countLeadingZeros() > + RhsMasks.first.countLeadingZeros(); + } + + if (LhsMasks.second != RhsMasks.second) { + if (LhsMasks.second.countPopulation() < RhsMasks.second.countPopulation()) + return true; + return LhsMasks.second.countLeadingZeros() > + RhsMasks.second.countLeadingZeros(); + } + + return LhsIdx < RhsIdx; + }); // Now construct opcode groups. Groups are used by the SubtargetEmitter when // expanding the body of a STIPredicate function. In particular, each opcode @@ -499,7 +497,7 @@ llvm::sort(ProcRecords, LessRecordFieldName()); // Reserve space because we can. Reallocation would be ok. - ProcModels.reserve(ProcRecords.size()+1); + ProcModels.reserve(ProcRecords.size() + 1); // Use idx=0 for NoModel/NoItineraries. Record *NoModelDef = Records.getDef("NoSchedModel"); @@ -524,8 +522,7 @@ if (ModelKey->isSubClassOf("SchedMachineModel")) { Record *ItinsDef = ModelKey->getValueAsDef("Itineraries"); ProcModels.emplace_back(ProcModels.size(), Name, ModelKey, ItinsDef); - } - else { + } else { // An itinerary is defined without a machine model. Infer a new model. if (!ModelKey->getValueAsListOfDefs("IID").empty()) Name = Name + "Model"; @@ -537,7 +534,7 @@ // Recursively find all reachable SchedReadWrite records. static void scanSchedRW(Record *RWDef, RecVec &RWDefs, - SmallPtrSet &RWSet) { + SmallPtrSet &RWSet) { if (!RWSet.insert(RWDef).second) return; RWDefs.push_back(RWDef); @@ -546,8 +543,7 @@ RecVec Seq = RWDef->getValueAsListOfDefs("Writes"); for (Record *WSRec : Seq) scanSchedRW(WSRec, RWDefs, RWSet); - } - else if (RWDef->isSubClassOf("SchedVariant")) { + } else if (RWDef->isSubClassOf("SchedVariant")) { // Visit each variant (guarded by a different predicate). RecVec Vars = RWDef->getValueAsListOfDefs("Variants"); for (Record *Variant : Vars) { @@ -566,7 +562,7 @@ SchedWrites.resize(1); SchedReads.resize(1); - SmallPtrSet RWSet; + SmallPtrSet RWSet; // Find all SchedReadWrites referenced by instruction defs. RecVec SWDefs, SRDefs; @@ -623,8 +619,7 @@ if (!AliasDef->isSubClassOf("SchedWrite")) PrintFatalError(ADef->getLoc(), "SchedWrite Alias must be SchedWrite"); scanSchedRW(AliasDef, SWDefs, RWSet); - } - else { + } else { assert(MatchDef->isSubClassOf("SchedRead") && "Unknown SchedReadWrite"); if (!AliasDef->isSubClassOf("SchedRead")) PrintFatalError(ADef->getLoc(), "SchedRead Alias must be SchedRead"); @@ -640,7 +635,7 @@ } llvm::sort(SRDefs, LessRecord()); for (Record *SRDef : SRDefs) { - assert(!getSchedRWIdx(SRDef, /*IsRead-*/true) && "duplicate SchedWrite"); + assert(!getSchedRWIdx(SRDef, /*IsRead-*/ true) && "duplicate SchedWrite"); SchedReads.emplace_back(SchedReads.size(), SRDef); } // Initialize WriteSequence vectors. @@ -716,8 +711,8 @@ return false; } -static void splitSchedReadWrites(const RecVec &RWDefs, - RecVec &WriteDefs, RecVec &ReadDefs) { +static void splitSchedReadWrites(const RecVec &RWDefs, RecVec &WriteDefs, + RecVec &ReadDefs) { for (Record *RWDef : RWDefs) { if (RWDef->isSubClassOf("SchedWrite")) WriteDefs.push_back(RWDef); @@ -729,8 +724,8 @@ } // Split the SchedReadWrites defs and call findRWs for each list. -void CodeGenSchedModels::findRWs(const RecVec &RWDefs, - IdxVec &Writes, IdxVec &Reads) const { +void CodeGenSchedModels::findRWs(const RecVec &RWDefs, IdxVec &Writes, + IdxVec &Reads) const { RecVec WriteDefs; RecVec ReadDefs; splitSchedReadWrites(RWDefs, WriteDefs, ReadDefs); @@ -755,8 +750,7 @@ RWSeq.push_back(RWIdx); return; } - int Repeat = - SchedRW.TheDef ? SchedRW.TheDef->getValueAsInt("Repeat") : 1; + int Repeat = SchedRW.TheDef ? SchedRW.TheDef->getValueAsInt("Repeat") : 1; for (int i = 0; i < Repeat; ++i) { for (unsigned I : SchedRW.Sequence) { expandRWSequence(I, RWSeq, IsRead); @@ -767,8 +761,8 @@ // Expand a SchedWrite as a sequence following any aliases that coincide with // the given processor model. void CodeGenSchedModels::expandRWSeqForProc( - unsigned RWIdx, IdxVec &RWSeq, bool IsRead, - const CodeGenProcModel &ProcModel) const { + unsigned RWIdx, IdxVec &RWSeq, bool IsRead, + const CodeGenProcModel &ProcModel) const { const CodeGenSchedRW &SchedWrite = getSchedRW(RWIdx, IsRead); Record *AliasDef = nullptr; @@ -780,14 +774,16 @@ continue; } if (AliasDef) - PrintFatalError(AliasRW.TheDef->getLoc(), "Multiple aliases " - "defined for processor " + ProcModel.ModelName + - " Ensure only one SchedAlias exists per RW."); + PrintFatalError(AliasRW.TheDef->getLoc(), + "Multiple aliases " + "defined for processor " + + ProcModel.ModelName + + " Ensure only one SchedAlias exists per RW."); AliasDef = AliasRW.TheDef; } if (AliasDef) { - expandRWSeqForProc(getSchedRWIdx(AliasDef, IsRead), - RWSeq, IsRead,ProcModel); + expandRWSeqForProc(getSchedRWIdx(AliasDef, IsRead), RWSeq, IsRead, + ProcModel); return; } if (!SchedWrite.IsSequence) { @@ -795,7 +791,7 @@ return; } int Repeat = - SchedWrite.TheDef ? SchedWrite.TheDef->getValueAsInt("Repeat") : 1; + SchedWrite.TheDef ? SchedWrite.TheDef->getValueAsInt("Repeat") : 1; for (int I = 0, E = Repeat; I < E; ++I) { for (unsigned Idx : SchedWrite.Sequence) { expandRWSeqForProc(Idx, RWSeq, IsRead, ProcModel); @@ -840,8 +836,7 @@ // NoItinerary is always the first class at Idx=0 assert(SchedClasses.empty() && "Expected empty sched class"); - SchedClasses.emplace_back(0, "NoInstrModel", - Records.getDef("NoItinerary")); + SchedClasses.emplace_back(0, "NoInstrModel", Records.getDef("NoItinerary")); SchedClasses.back().ProcIndices.push_back(0); // Create a SchedClass for each unique combination of itinerary class and @@ -853,7 +848,7 @@ findRWs(Inst->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads); // ProcIdx == 0 indicates the class applies to all processors. - unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, /*ProcIndices*/{0}); + unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, /*ProcIndices*/ {0}); InstrClassMap[Inst->TheDef] = SCIdx; } // Create classes for InstRW defs. @@ -885,7 +880,8 @@ } CodeGenSchedClass &SC = getSchedClass(SCIdx); if (SC.ProcIndices[0] != 0) - PrintFatalError(Inst->TheDef->getLoc(), "Instruction's sched class " + PrintFatalError(Inst->TheDef->getLoc(), + "Instruction's sched class " "must not be subtarget specific."); IdxVec ProcIndices; @@ -915,8 +911,7 @@ << InstName); IdxVec Writes; IdxVec Reads; - findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), - Writes, Reads); + findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); LLVM_DEBUG({ for (unsigned WIdx : Writes) dbgs() << " " << SchedWrites[WIdx].Name; @@ -985,25 +980,23 @@ assert(!ProcIndices.empty() && "expect at least one ProcIdx"); auto IsKeyEqual = [=](const CodeGenSchedClass &SC) { - return SC.isKeyEqual(ItinClassDef, OperWrites, OperReads); - }; + return SC.isKeyEqual(ItinClassDef, OperWrites, OperReads); + }; auto I = find_if(make_range(schedClassBegin(), schedClassEnd()), IsKeyEqual); unsigned Idx = I == schedClassEnd() ? 0 : std::distance(schedClassBegin(), I); if (Idx || SchedClasses[0].isKeyEqual(ItinClassDef, OperWrites, OperReads)) { IdxVec PI; std::set_union(SchedClasses[Idx].ProcIndices.begin(), - SchedClasses[Idx].ProcIndices.end(), - ProcIndices.begin(), ProcIndices.end(), - std::back_inserter(PI)); + SchedClasses[Idx].ProcIndices.end(), ProcIndices.begin(), + ProcIndices.end(), std::back_inserter(PI)); SchedClasses[Idx].ProcIndices = std::move(PI); return Idx; } Idx = SchedClasses.size(); - SchedClasses.emplace_back(Idx, - createSchedClassName(ItinClassDef, OperWrites, - OperReads), - ItinClassDef); + SchedClasses.emplace_back( + Idx, createSchedClassName(ItinClassDef, OperWrites, OperReads), + ItinClassDef); CodeGenSchedClass &SC = SchedClasses.back(); SC.Writes = OperWrites; SC.Reads = OperReads; @@ -1036,17 +1029,16 @@ // the Instrs to it. for (auto &Entry : ClassInstrs) { unsigned OldSCIdx = Entry.first; - ArrayRef InstDefs = Entry.second; + ArrayRef InstDefs = Entry.second; // If the all instrs in the current class are accounted for, then leave // them mapped to their old class. if (OldSCIdx) { const RecVec &RWDefs = SchedClasses[OldSCIdx].InstRWs; if (!RWDefs.empty()) { const RecVec *OrigInstDefs = Sets.expand(RWDefs[0]); - unsigned OrigNumInstrs = - count_if(*OrigInstDefs, [&](Record *OIDef) { - return InstrClassMap[OIDef] == OldSCIdx; - }); + unsigned OrigNumInstrs = count_if(*OrigInstDefs, [&](Record *OIDef) { + return InstrClassMap[OIDef] == OldSCIdx; + }); if (OrigNumInstrs == InstDefs.size()) { assert(SchedClasses[OldSCIdx].ProcIndices[0] == 0 && "expected a generic SchedClass"); @@ -1057,9 +1049,11 @@ if (RWD->getValueAsDef("SchedModel") == RWModelDef && RWModelDef->getValueAsBit("FullInstRWOverlapCheck")) { for (Record *Inst : InstDefs) { - PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " + - Inst->getName() + " also matches " + - RWD->getValue("Instrs")->getValue()->getAsString()); + PrintFatalError( + InstRWDef->getLoc(), + "Overlapping InstRW def " + Inst->getName() + + " also matches " + + RWD->getValue("Instrs")->getValue()->getAsString()); } } } @@ -1089,13 +1083,14 @@ for (Record *OldRWDef : SchedClasses[OldSCIdx].InstRWs) { if (OldRWDef->getValueAsDef("SchedModel") == RWModelDef) { for (Record *InstDef : InstDefs) { - PrintFatalError(OldRWDef->getLoc(), "Overlapping InstRW def " + - InstDef->getName() + " also matches " + - OldRWDef->getValue("Instrs")->getValue()->getAsString()); + PrintFatalError( + OldRWDef->getLoc(), + "Overlapping InstRW def " + InstDef->getName() + + " also matches " + + OldRWDef->getValue("Instrs")->getValue()->getAsString()); } } - assert(OldRWDef != InstRWDef && - "SchedClass has duplicate InstRW def"); + assert(OldRWDef != InstRWDef && "SchedClass has duplicate InstRW def"); SC.InstRWs.push_back(OldRWDef); } } @@ -1108,7 +1103,8 @@ // True if collectProcItins found anything. bool CodeGenSchedModels::hasItineraries() const { - for (const CodeGenProcModel &PM : make_range(procModelBegin(),procModelEnd())) + for (const CodeGenProcModel &PM : + make_range(procModelBegin(), procModelEnd())) if (PM.hasItineraries()) return true; return false; @@ -1163,14 +1159,14 @@ void CodeGenSchedModels::collectProcItinRW() { RecVec ItinRWDefs = Records.getAllDerivedDefinitions("ItinRW"); llvm::sort(ItinRWDefs, LessRecord()); - for (Record *RWDef : ItinRWDefs) { + for (Record *RWDef : ItinRWDefs) { if (!RWDef->getValueInit("SchedModel")->isComplete()) PrintFatalError(RWDef->getLoc(), "SchedModel is undefined"); Record *ModelDef = RWDef->getValueAsDef("SchedModel"); ProcModelMapTy::const_iterator I = ProcModelMap.find(ModelDef); if (I == ProcModelMap.end()) { - PrintFatalError(RWDef->getLoc(), "Undefined SchedMachineModel " - + ModelDef->getName()); + PrintFatalError(RWDef->getLoc(), + "Undefined SchedMachineModel " + ModelDef->getName()); } ProcModels[I->second].ItinRWDefs.push_back(RWDef); } @@ -1179,8 +1175,9 @@ // Gather the unsupported features for processor models. void CodeGenSchedModels::collectProcUnsupportedFeatures() { for (CodeGenProcModel &ProcModel : ProcModels) { - for (Record *Pred : ProcModel.ModelDef->getValueAsListOfDefs("UnsupportedFeatures")) { - ProcModel.UnsupportedFeaturesDefs.push_back(Pred); + for (Record *Pred : + ProcModel.ModelDef->getValueAsListOfDefs("UnsupportedFeatures")) { + ProcModel.UnsupportedFeaturesDefs.push_back(Pred); } } } @@ -1201,10 +1198,10 @@ if (!SchedClasses[Idx].InstRWs.empty()) inferFromInstRWs(Idx); if (!SchedClasses[Idx].Writes.empty()) { - inferFromRW(SchedClasses[Idx].Writes, SchedClasses[Idx].Reads, - Idx, SchedClasses[Idx].ProcIndices); + inferFromRW(SchedClasses[Idx].Writes, SchedClasses[Idx].Reads, Idx, + SchedClasses[Idx].ProcIndices); } - assert(SchedClasses.size() < (NumInstrSchedClasses*6) && + assert(SchedClasses.size() < (NumInstrSchedClasses * 6) && "too many SchedVariants"); } } @@ -1221,9 +1218,9 @@ if (!std::count(Matched.begin(), Matched.end(), ItinClassDef)) continue; if (HasMatch) - PrintFatalError(Rec->getLoc(), "Duplicate itinerary class " - + ItinClassDef->getName() - + " in ItinResources for " + PM.ModelName); + PrintFatalError(Rec->getLoc(), + "Duplicate itinerary class " + ItinClassDef->getName() + + " in ItinResources for " + PM.ModelName); HasMatch = true; IdxVec Writes, Reads; findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); @@ -1263,8 +1260,8 @@ unsigned ProcIdx; // Processor model index or zero for any. unsigned TransVecIdx; // Index into PredTransitions::TransVec. - TransVariant(Record *def, unsigned rwi, unsigned pi, unsigned ti): - VarOrSeqDef(def), RWIdx(rwi), ProcIdx(pi), TransVecIdx(ti) {} + TransVariant(Record *def, unsigned rwi, unsigned pi, unsigned ti) + : VarOrSeqDef(def), RWIdx(rwi), ProcIdx(pi), TransVecIdx(ti) {} }; // Associate a predicate with the SchedReadWrite that it guards. @@ -1274,15 +1271,16 @@ unsigned RWIdx; Record *Predicate; - PredCheck(bool r, unsigned w, Record *p): IsRead(r), RWIdx(w), Predicate(p) {} + PredCheck(bool r, unsigned w, Record *p) + : IsRead(r), RWIdx(w), Predicate(p) {} }; // A Predicate transition is a list of RW sequences guarded by a PredTerm. struct PredTransition { // A predicate term is a conjunction of PredChecks. SmallVector PredTerm; - SmallVector, 16> WriteSequences; - SmallVector, 16> ReadSequences; + SmallVector, 16> WriteSequences; + SmallVector, 16> ReadSequences; SmallVector ProcIndices; }; @@ -1294,7 +1292,7 @@ public: std::vector TransVec; - PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {} + PredTransitions(CodeGenSchedModels &sm) : SchedModels(sm) {} void substituteVariantOperand(const SmallVectorImpl &RWSeq, bool IsRead, unsigned StartIdx); @@ -1307,9 +1305,8 @@ private: bool mutuallyExclusive(Record *PredDef, ArrayRef Term); - void getIntersectingVariants( - const CodeGenSchedRW &SchedRW, unsigned TransIdx, - std::vector &IntersectingVariants); + void getIntersectingVariants(const CodeGenSchedRW &SchedRW, unsigned TransIdx, + std::vector &IntersectingVariants); void pushVariant(const TransVariant &VInfo, bool IsRead); }; @@ -1326,7 +1323,7 @@ // conditions implicitly negate any prior condition. bool PredTransitions::mutuallyExclusive(Record *PredDef, ArrayRef Term) { - for (const PredCheck &PC: Term) { + for (const PredCheck &PC : Term) { if (PC.Predicate == PredDef) return false; @@ -1348,7 +1345,7 @@ for (Record *Alias : RW.Aliases) { const CodeGenSchedRW &AliasRW = - SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW")); + SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW")); if (AliasRW.HasVariants) return true; if (AliasRW.IsSequence) { @@ -1384,8 +1381,8 @@ // given SchedRW whose processor indices and predicates are not mutually // exclusive with the given transition. void PredTransitions::getIntersectingVariants( - const CodeGenSchedRW &SchedRW, unsigned TransIdx, - std::vector &IntersectingVariants) { + const CodeGenSchedRW &SchedRW, unsigned TransIdx, + std::vector &IntersectingVariants) { bool GenericRW = false; @@ -1414,7 +1411,7 @@ AliasProcIdx = SchedModels.getProcModel(ModelDef).Index; } const CodeGenSchedRW &AliasRW = - SchedModels.getSchedRW((*AI)->getValueAsDef("AliasRW")); + SchedModels.getSchedRW((*AI)->getValueAsDef("AliasRW")); if (AliasRW.HasVariants) { const RecVec VarDefs = AliasRW.TheDef->getValueAsListOfDefs("Variants"); @@ -1431,17 +1428,17 @@ // A zero processor index means any processor. SmallVectorImpl &ProcIndices = TransVec[TransIdx].ProcIndices; if (ProcIndices[0] && Variant.ProcIdx) { - unsigned Cnt = std::count(ProcIndices.begin(), ProcIndices.end(), - Variant.ProcIdx); + unsigned Cnt = + std::count(ProcIndices.begin(), ProcIndices.end(), Variant.ProcIdx); if (!Cnt) continue; if (Cnt > 1) { const CodeGenProcModel &PM = - *(SchedModels.procModelBegin() + Variant.ProcIdx); + *(SchedModels.procModelBegin() + Variant.ProcIdx); PrintFatalError(Variant.VarOrSeqDef->getLoc(), "Multiple variants defined for processor " + - PM.ModelName + - " Ensure only one SchedAlias exists per RW."); + PM.ModelName + + " Ensure only one SchedAlias exists per RW."); } } if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) { @@ -1453,8 +1450,7 @@ // The first variant builds on the existing transition. Variant.TransVecIdx = TransIdx; IntersectingVariants.push_back(Variant); - } - else { + } else { // Push another copy of the current transition for more variants. Variant.TransVecIdx = TransVec.size(); IntersectingVariants.push_back(Variant); @@ -1462,15 +1458,15 @@ } } if (GenericRW && IntersectingVariants.empty()) { - PrintFatalError(SchedRW.TheDef->getLoc(), "No variant of this type has " + PrintFatalError(SchedRW.TheDef->getLoc(), + "No variant of this type has " "a matching predicate on any processor"); } } // Push the Reads/Writes selected by this variant onto the PredTransition // specified by VInfo. -void PredTransitions:: -pushVariant(const TransVariant &VInfo, bool IsRead) { +void PredTransitions::pushVariant(const TransVariant &VInfo, bool IsRead) { PredTransition &Trans = TransVec[VInfo.TransVecIdx]; // If this operand transition is reached through a processor-specific alias, @@ -1481,11 +1477,10 @@ IdxVec SelectedRWs; if (VInfo.VarOrSeqDef->isSubClassOf("SchedVar")) { Record *PredDef = VInfo.VarOrSeqDef->getValueAsDef("Predicate"); - Trans.PredTerm.emplace_back(IsRead, VInfo.RWIdx,PredDef); + Trans.PredTerm.emplace_back(IsRead, VInfo.RWIdx, PredDef); RecVec SelectedDefs = VInfo.VarOrSeqDef->getValueAsListOfDefs("Selected"); SchedModels.findRWs(SelectedDefs, SelectedRWs, IsRead); - } - else { + } else { assert(VInfo.VarOrSeqDef->isSubClassOf("WriteSequence") && "variant must be a SchedVariant or aliased WriteSequence"); SelectedRWs.push_back(SchedModels.getSchedRWIdx(VInfo.VarOrSeqDef, IsRead)); @@ -1493,10 +1488,10 @@ const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(VInfo.RWIdx, IsRead); - SmallVectorImpl> &RWSequences = IsRead - ? Trans.ReadSequences : Trans.WriteSequences; + SmallVectorImpl> &RWSequences = + IsRead ? Trans.ReadSequences : Trans.WriteSequences; if (SchedRW.IsVariadic) { - unsigned OperIdx = RWSequences.size()-1; + unsigned OperIdx = RWSequences.size() - 1; // Make N-1 copies of this transition's last sequence. RWSequences.insert(RWSequences.end(), SelectedRWs.size() - 1, RWSequences[OperIdx]); @@ -1504,8 +1499,8 @@ // sequence (split the current operand into N operands). // Note that write sequences should be expanded within this loop--the entire // sequence belongs to a single operand. - for (IdxIter RWI = SelectedRWs.begin(), RWE = SelectedRWs.end(); - RWI != RWE; ++RWI, ++OperIdx) { + for (IdxIter RWI = SelectedRWs.begin(), RWE = SelectedRWs.end(); RWI != RWE; + ++RWI, ++OperIdx) { IdxVec ExpandedRWs; if (IsRead) ExpandedRWs.push_back(*RWI); @@ -1515,14 +1510,13 @@ ExpandedRWs.begin(), ExpandedRWs.end()); } assert(OperIdx == RWSequences.size() && "missed a sequence"); - } - else { + } else { // Push this transition's expanded sequence onto this transition's last // sequence (add to the current operand's sequence). SmallVectorImpl &Seq = RWSequences.back(); IdxVec ExpandedRWs; - for (IdxIter RWI = SelectedRWs.begin(), RWE = SelectedRWs.end(); - RWI != RWE; ++RWI) { + for (IdxIter RWI = SelectedRWs.begin(), RWE = SelectedRWs.end(); RWI != RWE; + ++RWI) { if (IsRead) ExpandedRWs.push_back(*RWI); else @@ -1537,11 +1531,12 @@ // starts. RWSeq must be applied to all transitions between StartIdx and the end // of TransVec. void PredTransitions::substituteVariantOperand( - const SmallVectorImpl &RWSeq, bool IsRead, unsigned StartIdx) { + const SmallVectorImpl &RWSeq, bool IsRead, unsigned StartIdx) { // Visit each original RW within the current sequence. - for (SmallVectorImpl::const_iterator - RWI = RWSeq.begin(), RWE = RWSeq.end(); RWI != RWE; ++RWI) { + for (SmallVectorImpl::const_iterator RWI = RWSeq.begin(), + RWE = RWSeq.end(); + RWI != RWE; ++RWI) { const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(*RWI, IsRead); // Push this RW on all partial PredTransitions or distribute variants. // New PredTransitions may be pushed within this loop which should not be @@ -1562,8 +1557,8 @@ getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants); // Now expand each variant on top of its copy of the transition. for (std::vector::const_iterator - IVI = IntersectingVariants.begin(), - IVE = IntersectingVariants.end(); + IVI = IntersectingVariants.begin(), + IVE = IntersectingVariants.end(); IVI != IVE; ++IVI) { pushVariant(*IVI, IsRead); } @@ -1586,23 +1581,27 @@ TransVec.back().ProcIndices = Trans.ProcIndices; // Visit each original write sequence. - for (SmallVectorImpl>::const_iterator - WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end(); + for (SmallVectorImpl>::const_iterator + WSI = Trans.WriteSequences.begin(), + WSE = Trans.WriteSequences.end(); WSI != WSE; ++WSI) { // Push a new (empty) write sequence onto all partial Transitions. - for (std::vector::iterator I = - TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) { + for (std::vector::iterator I = TransVec.begin() + StartIdx, + E = TransVec.end(); + I != E; ++I) { I->WriteSequences.emplace_back(); } substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx); } // Visit each original read sequence. - for (SmallVectorImpl>::const_iterator - RSI = Trans.ReadSequences.begin(), RSE = Trans.ReadSequences.end(); + for (SmallVectorImpl>::const_iterator + RSI = Trans.ReadSequences.begin(), + RSE = Trans.ReadSequences.end(); RSI != RSE; ++RSI) { // Push a new (empty) read sequence onto all partial Transitions. - for (std::vector::iterator I = - TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) { + for (std::vector::iterator I = TransVec.begin() + StartIdx, + E = TransVec.end(); + I != E; ++I) { I->ReadSequences.emplace_back(); } substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx); @@ -1615,8 +1614,9 @@ CodeGenSchedModels &SchedModels) { // For each PredTransition, create a new CodeGenSchedTransition, which usually // requires creating a new SchedClass. - for (ArrayRef::iterator - I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) { + for (ArrayRef::iterator I = LastTransitions.begin(), + E = LastTransitions.end(); + I != E; ++I) { IdxVec OperWritesVariant; transform(I->WriteSequences, std::back_inserter(OperWritesVariant), [&SchedModels](ArrayRef WS) { @@ -1629,15 +1629,13 @@ }); CodeGenSchedTransition SCTrans; SCTrans.ToClassIdx = - SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant, - OperReadsVariant, I->ProcIndices); + SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant, + OperReadsVariant, I->ProcIndices); SCTrans.ProcIndices.assign(I->ProcIndices.begin(), I->ProcIndices.end()); // The final PredTerm is unique set of predicates guarding the transition. RecVec Preds; transform(I->PredTerm, std::back_inserter(Preds), - [](const PredCheck &P) { - return P.Predicate; - }); + [](const PredCheck &P) { return P.Predicate; }); Preds.erase(std::unique(Preds.begin(), Preds.end()), Preds.end()); SCTrans.PredTerm = std::move(Preds); SchedModels.getSchedClass(FromClassIdx) @@ -1706,9 +1704,9 @@ if (!PM.ProcResourceDefs[i]->isSubClassOf("ProcResGroup")) continue; RecVec SuperUnits = - PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources"); + PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources"); RecIter RI = SubUnits.begin(), RE = SubUnits.end(); - for ( ; RI != RE; ++RI) { + for (; RI != RE; ++RI) { if (!is_contained(SuperUnits, *RI)) { break; } @@ -1725,23 +1723,23 @@ if (!PM.ProcResourceDefs[i]->isSubClassOf("ProcResGroup")) continue; RecVec CheckUnits = - PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources"); - for (unsigned j = i+1; j < e; ++j) { + PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources"); + for (unsigned j = i + 1; j < e; ++j) { if (!PM.ProcResourceDefs[j]->isSubClassOf("ProcResGroup")) continue; RecVec OtherUnits = - PM.ProcResourceDefs[j]->getValueAsListOfDefs("Resources"); + PM.ProcResourceDefs[j]->getValueAsListOfDefs("Resources"); if (std::find_first_of(CheckUnits.begin(), CheckUnits.end(), - OtherUnits.begin(), OtherUnits.end()) - != CheckUnits.end()) { + OtherUnits.begin(), + OtherUnits.end()) != CheckUnits.end()) { // CheckUnits and OtherUnits overlap OtherUnits.insert(OtherUnits.end(), CheckUnits.begin(), CheckUnits.end()); if (!hasSuperGroup(OtherUnits, PM)) { PrintFatalError((PM.ProcResourceDefs[i])->getLoc(), - "proc resource group overlaps with " - + PM.ProcResourceDefs[j]->getName() - + " but no supergroup contains both."); + "proc resource group overlaps with " + + PM.ProcResourceDefs[j]->getName() + + " but no supergroup contains both."); } } } @@ -1757,7 +1755,7 @@ // For each register file definition, construct a CodeGenRegisterFile object // and add it to the appropriate scheduling model. CodeGenProcModel &PM = getProcModel(RF->getValueAsDef("SchedModel")); - PM.RegisterFiles.emplace_back(CodeGenRegisterFile(RF->getName(),RF)); + PM.RegisterFiles.emplace_back(CodeGenRegisterFile(RF->getName(), RF)); CodeGenRegisterFile &CGRF = PM.RegisterFiles.back(); // Now set the number of physical registers as well as the cost of registers @@ -1772,30 +1770,33 @@ } } -// Collect all the RegisterFile definitions available in this target. +// Collect all the ProcPfmCounters definitions available in this target. void CodeGenSchedModels::collectPfmCounters() { - for (Record *Def : Records.getAllDerivedDefinitions("PfmIssueCounter")) { - CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); - PM.PfmIssueCounterDefs.emplace_back(Def); - } - for (Record *Def : Records.getAllDerivedDefinitions("PfmCycleCounter")) { - CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); - if (PM.PfmCycleCounterDef) { - PrintFatalError(Def->getLoc(), - "multiple cycle counters for " + - Def->getValueAsDef("SchedModel")->getName()); - } - PM.PfmCycleCounterDef = Def; - } - for (Record *Def : Records.getAllDerivedDefinitions("PfmUopsCounter")) { - CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel")); - if (PM.PfmUopsCounterDef) { - PrintFatalError(Def->getLoc(), - "multiple uops counters for " + - Def->getValueAsDef("SchedModel")->getName()); - } - PM.PfmUopsCounterDef = Def; - } + const auto AddPfmCounterName = [this](const Record *PfmCounterDef) { + const llvm::StringRef Counter = PfmCounterDef->getValueAsString("Counter"); + if (!Counter.empty()) + PfmCounterNameTable.emplace(Counter, 0); + }; + for (Record *Def : Records.getAllDerivedDefinitions("ProcPfmCounters")) { + // Check that ResourceNames are unique. + llvm::SmallSet Seen; + for (const Record *IssueCounter : + Def->getValueAsListOfDefs("IssueCounters")) { + const llvm::StringRef ResourceName = + IssueCounter->getValueAsString("ResourceName"); + if (ResourceName.empty()) + PrintFatalError(IssueCounter->getLoc(), "invalid empty ResourceName"); + if (!Seen.insert(ResourceName).second) + PrintFatalError(IssueCounter->getLoc(), + "duplicate ResourceName " + ResourceName); + AddPfmCounterName(IssueCounter); + } + AddPfmCounterName(Def->getValueAsDef("CycleCounter")); + AddPfmCounterName(Def->getValueAsDef("UopsCounter")); + } + unsigned Index = 0; + for (auto &NameAndIndex : PfmCounterNameTable) + NameAndIndex.second = Index++; } // Collect and sort WriteRes, ReadAdvance, and ProcResources. @@ -1916,8 +1917,8 @@ unsigned SCIdx = getSchedClassIdx(*Inst); if (!SCIdx) { if (Inst->TheDef->isValueUnset("SchedRW") && !HadCompleteModel) { - PrintError("No schedule information for instruction '" - + Inst->TheDef->getName() + "'"); + PrintError("No schedule information for instruction '" + + Inst->TheDef->getName() + "'"); Complete = false; } continue; @@ -1943,14 +1944,18 @@ HadCompleteModel = true; } if (!Complete) { - errs() << "\n\nIncomplete schedule models found.\n" - << "- Consider setting 'CompleteModel = 0' while developing new models.\n" - << "- Pseudo instructions can be marked with 'hasNoSchedulingInfo = 1'.\n" - << "- Instructions should usually have Sched<[...]> as a superclass, " - "you may temporarily use an empty list.\n" - << "- Instructions related to unsupported features can be excluded with " - "list UnsupportedFeatures = [HasA,..,HasY]; in the " - "processor model.\n\n"; + errs() + << "\n\nIncomplete schedule models found.\n" + << "- Consider setting 'CompleteModel = 0' while developing new " + "models.\n" + << "- Pseudo instructions can be marked with 'hasNoSchedulingInfo = " + "1'.\n" + << "- Instructions should usually have Sched<[...]> as a superclass, " + "you may temporarily use an empty list.\n" + << "- Instructions related to unsupported features can be excluded " + "with " + "list UnsupportedFeatures = [HasA,..,HasY]; in the " + "processor model.\n\n"; PrintFatalError("Incomplete schedule model"); } } @@ -1961,15 +1966,15 @@ const CodeGenProcModel &PM = ProcModels[PIdx]; // For all ItinRW entries. bool HasMatch = false; - for (RecIter II = PM.ItinRWDefs.begin(), IE = PM.ItinRWDefs.end(); - II != IE; ++II) { + for (RecIter II = PM.ItinRWDefs.begin(), IE = PM.ItinRWDefs.end(); II != IE; + ++II) { RecVec Matched = (*II)->getValueAsListOfDefs("MatchedItinClasses"); if (!std::count(Matched.begin(), Matched.end(), ItinClassDef)) continue; if (HasMatch) - PrintFatalError((*II)->getLoc(), "Duplicate itinerary class " - + ItinClassDef->getName() - + " in ItinResources for " + PM.ModelName); + PrintFatalError((*II)->getLoc(), + "Duplicate itinerary class " + ItinClassDef->getName() + + " in ItinResources for " + PM.ModelName); HasMatch = true; IdxVec Writes, Reads; findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); @@ -1985,8 +1990,7 @@ if (!IsRead && SchedRW.TheDef->isSubClassOf("SchedWriteRes")) { for (unsigned Idx : ProcIndices) addWriteRes(SchedRW.TheDef, Idx); - } - else if (IsRead && SchedRW.TheDef->isSubClassOf("SchedReadAdvance")) { + } else if (IsRead && SchedRW.TheDef->isSubClassOf("SchedReadAdvance")) { for (unsigned Idx : ProcIndices) addReadAdvance(SchedRW.TheDef, Idx); } @@ -1996,17 +2000,16 @@ IdxVec AliasProcIndices; if ((*AI)->getValueInit("SchedModel")->isComplete()) { AliasProcIndices.push_back( - getProcModel((*AI)->getValueAsDef("SchedModel")).Index); - } - else + getProcModel((*AI)->getValueAsDef("SchedModel")).Index); + } else AliasProcIndices = ProcIndices; const CodeGenSchedRW &AliasRW = getSchedRW((*AI)->getValueAsDef("AliasRW")); assert(AliasRW.IsRead == IsRead && "cannot alias reads to writes"); IdxVec ExpandedRWs; expandRWSequence(AliasRW.Index, ExpandedRWs, IsRead); - for (IdxIter SI = ExpandedRWs.begin(), SE = ExpandedRWs.end(); - SI != SE; ++SI) { + for (IdxIter SI = ExpandedRWs.begin(), SE = ExpandedRWs.end(); SI != SE; + ++SI) { collectRWResources(*SI, IsRead, AliasProcIndices); } } @@ -2035,31 +2038,30 @@ assert(!ProcResGroups.empty()); for (Record *ProcResDef : ProcResourceDefs) { - if (ProcResDef->getValueAsDef("Kind") == ProcResKind - && ProcResDef->getValueAsDef("SchedModel") == PM.ModelDef) { + if (ProcResDef->getValueAsDef("Kind") == ProcResKind && + ProcResDef->getValueAsDef("SchedModel") == PM.ModelDef) { if (ProcUnitDef) { PrintFatalError(Loc, - "Multiple ProcessorResourceUnits associated with " - + ProcResKind->getName()); + "Multiple ProcessorResourceUnits associated with " + + ProcResKind->getName()); } ProcUnitDef = ProcResDef; } } for (Record *ProcResGroup : ProcResGroups) { - if (ProcResGroup == ProcResKind - && ProcResGroup->getValueAsDef("SchedModel") == PM.ModelDef) { + if (ProcResGroup == ProcResKind && + ProcResGroup->getValueAsDef("SchedModel") == PM.ModelDef) { if (ProcUnitDef) { PrintFatalError(Loc, - "Multiple ProcessorResourceUnits associated with " - + ProcResKind->getName()); + "Multiple ProcessorResourceUnits associated with " + + ProcResKind->getName()); } ProcUnitDef = ProcResGroup; } } if (!ProcUnitDef) { - PrintFatalError(Loc, - "No ProcessorResources associated with " - + ProcResKind->getName()); + PrintFatalError(Loc, "No ProcessorResources associated with " + + ProcResKind->getName()); } return ProcUnitDef; } @@ -2116,14 +2118,16 @@ RecIter PRPos = find(ProcResourceDefs, PRDef); if (PRPos == ProcResourceDefs.end()) PrintFatalError(PRDef->getLoc(), "ProcResource def is not included in " - "the ProcResources list for " + ModelName); + "the ProcResources list for " + + ModelName); // Idx=0 is reserved for invalid. return 1 + (PRPos - ProcResourceDefs.begin()); } bool CodeGenProcModel::isUnsupported(const CodeGenInstruction &Inst) const { for (const Record *TheDef : UnsupportedFeaturesDefs) { - for (const Record *PredDef : Inst.TheDef->getValueAsListOfDefs("Predicates")) { + for (const Record *PredDef : + Inst.TheDef->getValueAsListOfDefs("Predicates")) { if (TheDef->getName() == PredDef->getName()) return true; } @@ -2147,12 +2151,11 @@ } } -void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const { - dbgs() << "SCHEDCLASS " << Index << ":" << Name << '\n' - << " Writes: "; +void CodeGenSchedClass::dump(const CodeGenSchedModels *SchedModels) const { + dbgs() << "SCHEDCLASS " << Index << ":" << Name << '\n' << " Writes: "; for (unsigned i = 0, N = Writes.size(); i < N; ++i) { SchedModels->getSchedWrite(Writes[i]).dump(); - if (i < N-1) { + if (i < N - 1) { dbgs() << '\n'; dbgs().indent(10); } @@ -2160,12 +2163,14 @@ dbgs() << "\n Reads: "; for (unsigned i = 0, N = Reads.size(); i < N; ++i) { SchedModels->getSchedRead(Reads[i]).dump(); - if (i < N-1) { + if (i < N - 1) { dbgs() << '\n'; dbgs().indent(10); } } - dbgs() << "\n ProcIdx: "; dumpIdxVec(ProcIndices); dbgs() << '\n'; + dbgs() << "\n ProcIdx: "; + dumpIdxVec(ProcIndices); + dbgs() << '\n'; if (!Transitions.empty()) { dbgs() << "\n Transitions for Proc "; for (const CodeGenSchedTransition &Transition : Transitions) { @@ -2176,24 +2181,27 @@ void PredTransitions::dump() const { dbgs() << "Expanded Variants:\n"; - for (std::vector::const_iterator - TI = TransVec.begin(), TE = TransVec.end(); TI != TE; ++TI) { + for (std::vector::const_iterator TI = TransVec.begin(), + TE = TransVec.end(); + TI != TE; ++TI) { dbgs() << "{"; - for (SmallVectorImpl::const_iterator - PCI = TI->PredTerm.begin(), PCE = TI->PredTerm.end(); + for (SmallVectorImpl::const_iterator PCI = TI->PredTerm.begin(), + PCE = TI->PredTerm.end(); PCI != PCE; ++PCI) { if (PCI != TI->PredTerm.begin()) dbgs() << ", "; - dbgs() << SchedModels.getSchedRW(PCI->RWIdx, PCI->IsRead).Name - << ":" << PCI->Predicate->getName(); + dbgs() << SchedModels.getSchedRW(PCI->RWIdx, PCI->IsRead).Name << ":" + << PCI->Predicate->getName(); } dbgs() << "},\n => {"; - for (SmallVectorImpl>::const_iterator - WSI = TI->WriteSequences.begin(), WSE = TI->WriteSequences.end(); + for (SmallVectorImpl>::const_iterator + WSI = TI->WriteSequences.begin(), + WSE = TI->WriteSequences.end(); WSI != WSE; ++WSI) { dbgs() << "("; - for (SmallVectorImpl::const_iterator - WI = WSI->begin(), WE = WSI->end(); WI != WE; ++WI) { + for (SmallVectorImpl::const_iterator WI = WSI->begin(), + WE = WSI->end(); + WI != WE; ++WI) { if (WI != WSI->begin()) dbgs() << ", "; dbgs() << SchedModels.getSchedWrite(*WI).Name; Index: utils/TableGen/SubtargetEmitter.cpp =================================================================== --- utils/TableGen/SubtargetEmitter.cpp +++ utils/TableGen/SubtargetEmitter.cpp @@ -95,6 +95,7 @@ raw_ostream &OS); void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS); + void EmitPfmCounters(raw_ostream &OS); void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name, char Separator); void EmitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel, @@ -693,80 +694,12 @@ return CostTblIndex; } -static bool EmitPfmIssueCountersTable(const CodeGenProcModel &ProcModel, - raw_ostream &OS) { - unsigned NumCounterDefs = 1 + ProcModel.ProcResourceDefs.size(); - std::vector CounterDefs(NumCounterDefs); - bool HasCounters = false; - for (const Record *CounterDef : ProcModel.PfmIssueCounterDefs) { - const Record *&CD = CounterDefs[ProcModel.getProcResourceIdx( - CounterDef->getValueAsDef("Resource"))]; - if (CD) { - PrintFatalError(CounterDef->getLoc(), - "multiple issue counters for " + - CounterDef->getValueAsDef("Resource")->getName()); - } - CD = CounterDef; - HasCounters = true; - } - if (!HasCounters) { - return false; - } - OS << "\nstatic const char* " << ProcModel.ModelName - << "PfmIssueCounters[] = {\n"; - for (unsigned i = 0; i != NumCounterDefs; ++i) { - const Record *CounterDef = CounterDefs[i]; - if (CounterDef) { - const auto PfmCounters = CounterDef->getValueAsListOfStrings("Counters"); - if (PfmCounters.empty()) - PrintFatalError(CounterDef->getLoc(), "empty counter list"); - OS << " \"" << PfmCounters[0]; - for (unsigned p = 1, e = PfmCounters.size(); p != e; ++p) - OS << ",\" \"" << PfmCounters[p]; - OS << "\", // #" << i << " = "; - OS << CounterDef->getValueAsDef("Resource")->getName() << "\n"; - } else { - OS << " nullptr, // #" << i << "\n"; - } - } - OS << "};\n"; - return true; -} - -static void EmitPfmCounters(const CodeGenProcModel &ProcModel, - const bool HasPfmIssueCounters, raw_ostream &OS) { - OS << " {\n"; - // Emit the cycle counter. - if (ProcModel.PfmCycleCounterDef) - OS << " \"" << ProcModel.PfmCycleCounterDef->getValueAsString("Counter") - << "\", // Cycle counter.\n"; - else - OS << " nullptr, // No cycle counter.\n"; - - // Emit the uops counter. - if (ProcModel.PfmUopsCounterDef) - OS << " \"" << ProcModel.PfmUopsCounterDef->getValueAsString("Counter") - << "\", // Uops counter.\n"; - else - OS << " nullptr, // No uops counter.\n"; - - // Emit a reference to issue counters table. - if (HasPfmIssueCounters) - OS << " " << ProcModel.ModelName << "PfmIssueCounters\n"; - else - OS << " nullptr // No issue counters.\n"; - OS << " }\n"; -} - void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS) { // Generate a table of register file descriptors (one entry per each user // defined register file), and a table of register costs. unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS); - // Generate a table of ProcRes counter names. - const bool HasPfmIssueCounters = EmitPfmIssueCountersTable(ProcModel, OS); - // Now generate a table for the extra processor info. OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName << "ExtraInfo = {\n "; @@ -779,9 +712,83 @@ EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(), NumCostEntries, OS); - EmitPfmCounters(ProcModel, HasPfmIssueCounters, OS); + OS << "};\n"; +} + +static void +EmitPfmCountersInfo(const Record &Def, + std::map PfmCounterNameTable, + llvm::StringRef Target, unsigned &IssueCountersTableOffset, + raw_ostream &OS) { + const auto CycleCounter = + Def.getValueAsDef("CycleCounter")->getValueAsString("Counter"); + const auto UopsCounter = + Def.getValueAsDef("UopsCounter")->getValueAsString("Counter"); + const size_t NumIssueCounters = + Def.getValueAsListOfDefs("IssueCounters").size(); + + // This is the default, do not emit. + if (CycleCounter.empty() && UopsCounter.empty() && NumIssueCounters == 0) + return; + + OS << "\nstatic const llvm::MCPfmCountersInfo " << Target << Def.getName() + << " = {\n"; + + // Cycle Counter. + if (CycleCounter.empty()) + OS << " nullptr, // No cycle counter.\n"; + else + OS << " " << Target << "PfmCounterNames[" + << PfmCounterNameTable[CycleCounter] << "], // Cycle counter\n"; + + // Uops Counter. + if (UopsCounter.empty()) + OS << " nullptr, // No uops counter.\n"; + else + OS << " " << Target << "PfmCounterNames[" + << PfmCounterNameTable[UopsCounter] << "], // Uops counter\n"; + + // Issue Counters + if (NumIssueCounters == 0) + OS << " nullptr, // No issue counters.\n 0\n"; + else + OS << " " << Target << "PfmIssueCounters + " << IssueCountersTableOffset + << ", " << NumIssueCounters << " // Issue counters.\n"; OS << "};\n"; + IssueCountersTableOffset += NumIssueCounters; +} + +void SubtargetEmitter::EmitPfmCounters(raw_ostream &OS) { + // Emit the counter name table. + const auto &PfmCounterNameTable = SchedModels.getPfmCounterNameTable(); + OS << "\nstatic const char* " << Target << "PfmCounterNames[] = {\n"; + for (const auto &NameAndIndex : PfmCounterNameTable) + OS << " \"" << NameAndIndex.first << "\", // " << NameAndIndex.second + << "\n"; + OS << "};\n\n"; + + // Emit the IssueCounters table. + const RecVec PfmCounterDefs = + Records.getAllDerivedDefinitions("ProcPfmCounters"); + OS << "static const llvm::MCPfmCountersInfo::IssueCounter " << Target + << "PfmIssueCounters[] = {\n"; + for (const Record *Def : PfmCounterDefs) { + for (const Record *ICDef : Def->getValueAsListOfDefs("IssueCounters")) + OS << " { " << Target << "PfmCounterNames[" + << PfmCounterNameTable.find(ICDef->getValueAsString("Counter"))->second + << "], \"" << ICDef->getValueAsString("ResourceName") << "\"},\n"; + } + + OS << "};\n"; + + // Now generate the MCPfmCountersInfos. + unsigned IssueCountersTableOffset = 0; + for (const Record *Def : PfmCounterDefs) + EmitPfmCountersInfo(*Def, PfmCounterNameTable, Target, + IssueCountersTableOffset, OS); + + OS << "\n"; } void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, @@ -1406,7 +1413,8 @@ } // -// EmitProcessorLookup - generate cpu name to itinerary lookup table. +// EmitProcessorLookup - generate cpu name to sched model and cpu name to pfm +// counters lookup tables. // void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) { // Gather and sort processor information @@ -1414,12 +1422,11 @@ Records.getAllDerivedDefinitions("Processor"); llvm::sort(ProcessorList, LessRecordFieldName()); - // Begin processor table + // Begin processor->sched model table OS << "\n"; - OS << "// Sorted (by key) array of itineraries for CPU subtype.\n" - << "extern const llvm::SubtargetInfoKV " - << Target << "ProcSchedKV[] = {\n"; - + OS << "// Sorted (by key) array of sched model for CPU subtype.\n" + << "extern const llvm::SubtargetInfoKV " << Target + << "ProcSchedKV[] = {\n"; // For each processor for (Record *Processor : ProcessorList) { StringRef Name = Processor->getValueAsString("Name"); @@ -1429,8 +1436,33 @@ // Emit as { "cpu", procinit }, OS << " { \"" << Name << "\", (const void *)&" << ProcModelName << " },\n"; } + // End processor->sched model table + OS << "};\n"; - // End processor table + // Begin processor->sched model table + OS << "\n"; + OS << "// Sorted (by key) array of pfm counters for CPU subtype.\n" + << "extern const llvm::SubtargetInfoKV " << Target << "ProcPfmKV[] = {\n"; + // For each processor + for (Record *Processor : ProcessorList) { + const Record *Def = Processor->getValueAsDef("PfmCounters"); + const auto CycleCounter = + Def->getValueAsDef("CycleCounter")->getValueAsString("Counter"); + const auto UopsCounter = + Def->getValueAsDef("UopsCounter")->getValueAsString("Counter"); + const size_t NumIssueCounters = + Def->getValueAsListOfDefs("IssueCounters").size(); + + // Emit as { "cpu", procinit }, + OS << " { \"" << Processor->getValueAsString("Name") + << "\", (const void *)&"; + if (CycleCounter.empty() && UopsCounter.empty() && NumIssueCounters == 0) + OS << "MCPfmCountersInfo::getDefault()"; + else + OS << Target << Def->getName(); + OS << " },\n"; + } + // End processor->sched model table OS << "};\n"; } @@ -1447,6 +1479,8 @@ << "#define DBGFIELD(x)\n" << "#endif\n"; + EmitPfmCounters(OS); + if (SchedModels.hasItineraries()) { std::vector> ProcItinLists; // Emit the stage data @@ -1671,7 +1705,7 @@ // Emit target predicates. emitSchedModelHelpersImpl(OS); - + OS << "} // " << ClassName << "::resolveSchedClass\n\n"; OS << "unsigned " << ClassName @@ -1768,11 +1802,12 @@ << " StringRef CPU, StringRef FS, ArrayRef PF,\n" << " ArrayRef PD,\n" << " const SubtargetInfoKV *ProcSched,\n" + << " const SubtargetInfoKV *ProcPfm,\n" << " const MCWriteProcResEntry *WPR,\n" << " const MCWriteLatencyEntry *WL,\n" << " const MCReadAdvanceEntry *RA, const InstrStage *IS,\n" << " const unsigned *OC, const unsigned *FP) :\n" - << " MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched,\n" + << " MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, ProcPfm,\n" << " WPR, WL, RA, IS, OC, FP) { }\n\n" << " unsigned resolveVariantSchedClass(unsigned SchedClass,\n" << " const MCInst *MI, unsigned CPUID) const override {\n" @@ -1854,10 +1889,9 @@ else OS << "None, "; OS << '\n'; OS.indent(22); - OS << Target << "ProcSchedKV, " - << Target << "WriteProcResTable, " - << Target << "WriteLatencyTable, " - << Target << "ReadAdvanceTable, "; + OS << Target << "ProcSchedKV, " << Target << "ProcPfmKV, " << Target + << "WriteProcResTable, " << Target << "WriteLatencyTable, " << Target + << "ReadAdvanceTable, "; OS << '\n'; OS.indent(22); if (SchedModels.hasItineraries()) { OS << Target << "Stages, " @@ -1923,6 +1957,7 @@ OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n"; OS << "extern const llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n"; OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcSchedKV[];\n"; + OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcPfmKV[];\n"; OS << "extern const llvm::MCWriteProcResEntry " << Target << "WriteProcResTable[];\n"; OS << "extern const llvm::MCWriteLatencyEntry " @@ -1948,10 +1983,9 @@ else OS << "None, "; OS << '\n'; OS.indent(24); - OS << Target << "ProcSchedKV, " - << Target << "WriteProcResTable, " - << Target << "WriteLatencyTable, " - << Target << "ReadAdvanceTable, "; + OS << Target << "ProcSchedKV, " << Target << "ProcPfmKV, " << Target + << "WriteProcResTable, " << Target << "WriteLatencyTable, " << Target + << "ReadAdvanceTable, "; OS << '\n'; OS.indent(24); if (SchedModels.hasItineraries()) { OS << Target << "Stages, "