Index: test/tools/llvm-exegesis/X86/analysis-uops-variant.test =================================================================== --- /dev/null +++ test/tools/llvm-exegesis/X86/analysis-uops-variant.test @@ -0,0 +1,26 @@ +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file="" -analysis-numpoints=1 | FileCheck %s + +# CHECK: cluster_id,opcode_name,config,sched_class,SBPort0,SBPort1,SBPort23,SBPort4,SBPort5,NumMicroOps +# CHECK-NEXT: SBWriteZeroLatency + +--- +mode: uops +key: + instructions: + - 'XOR32rr EAX EAX EAX' + config: '' + register_initial_values: +cpu_name: sandybridge +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: SBPort0, value: 0.0012, per_snippet_value: 0.0012 } + - { key: SBPort1, value: 0.0021, per_snippet_value: 0.0021 } + - { key: SBPort23, value: 0.0013, per_snippet_value: 0.0013 } + - { key: SBPort4, value: 0.0018, per_snippet_value: 0.0018 } + - { key: SBPort5, value: 0.0012, per_snippet_value: 0.0012 } + - { key: NumMicroOps, value: 1.0108, per_snippet_value: 1.0108 } +error: '' +info: '' +assembled_snippet: 31C031C031C031C031C031C031C031C031C031C031C031C031C031C031C031C0C3 +... Index: tools/llvm-exegesis/lib/Analysis.h =================================================================== --- tools/llvm-exegesis/lib/Analysis.h +++ tools/llvm-exegesis/lib/Analysis.h @@ -49,11 +49,12 @@ using ClusterId = InstructionBenchmarkClustering::ClusterId; // An llvm::MCSchedClassDesc augmented with some additional data. - struct SchedClass { - SchedClass(const llvm::MCSchedClassDesc &SD, - const llvm::MCSubtargetInfo &STI); + struct ResolvedSchedClass { + ResolvedSchedClass(const llvm::MCSubtargetInfo &STI, + unsigned ResolvedSchedClassId, bool WasVariant); const llvm::MCSchedClassDesc *const SCDesc; + const bool WasVariant; // Whether the original class was variant. const llvm::SmallVector NonRedundantWriteProcRes; const std::vector> IdealizedProcResPressure; @@ -75,7 +76,8 @@ // Returns true if the cluster representative measurements match that of SC. bool - measurementsMatch(const llvm::MCSubtargetInfo &STI, const SchedClass &SC, + measurementsMatch(const llvm::MCSubtargetInfo &STI, + const ResolvedSchedClass &SC, const InstructionBenchmarkClustering &Clustering) const; void addPoint(size_t PointId, @@ -92,15 +94,21 @@ void printSchedClassClustersHtml(const std::vector &Clusters, - const SchedClass &SC, + const ResolvedSchedClass &SC, llvm::raw_ostream &OS) const; - void printSchedClassDescHtml(const SchedClass &SC, + void printSchedClassDescHtml(const ResolvedSchedClass &SC, llvm::raw_ostream &OS) const; - // Builds a map of Sched Class -> indices of points that belong to the sched - // class. - std::unordered_map> - makePointsPerSchedClass() const; + // A pair of (Sched Class, indices of points that belong to the sched + // class). + struct ResolvedSchedClassAndPoints { + explicit ResolvedSchedClassAndPoints(ResolvedSchedClass &&RSC); + + ResolvedSchedClass RSC; + std::vector PointIds; + }; + // Builds a list of ResolvedSchedClassAndPoints. + std::vector makePointsPerSchedClass() const; template void writeSnippet(llvm::raw_ostream &OS, llvm::ArrayRef Bytes, Index: tools/llvm-exegesis/lib/Analysis.cpp =================================================================== --- tools/llvm-exegesis/lib/Analysis.cpp +++ tools/llvm-exegesis/lib/Analysis.cpp @@ -19,6 +19,16 @@ static const char kCsvSep = ','; +static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI, + unsigned SchedClassId, + const llvm::MCInst &MCI) { + const auto &SM = STI.getSchedModel(); + while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant()) + SchedClassId = + STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID()); + return SchedClassId; +} + namespace { enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; @@ -126,11 +136,12 @@ writeEscaped(OS, Point.Key.Config); OS << kCsvSep; assert(!Point.Key.Instructions.empty()); - // FIXME: Resolve variant classes. - const unsigned SchedClassId = - InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass(); -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const auto &SchedModel = SubtargetInfo_->getSchedModel(); + const llvm::MCInst &MCI = Point.Key.Instructions[0]; + const unsigned SchedClassId = resolveSchedClassId( + *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI); + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const llvm::MCSchedClassDesc *const SCDesc = SchedModel.getSchedClassDesc(SchedClassId); writeEscaped(OS, SCDesc->Name); @@ -193,21 +204,43 @@ return llvm::Error::success(); } -std::unordered_map> +Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( + ResolvedSchedClass &&RSC) + : RSC(std::move(RSC)) {} + +std::vector Analysis::makePointsPerSchedClass() const { - std::unordered_map> PointsPerSchedClass; + std::vector Entries; + // Maps SchedClassIds to index in result. + std::unordered_map SchedClassIdToIndex; const auto &Points = Clustering_.getPoints(); for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { const InstructionBenchmark &Point = Points[PointId]; if (!Point.Error.empty()) continue; assert(!Point.Key.Instructions.empty()); - const auto Opcode = Point.Key.Instructions[0].getOpcode(); - // FIXME: Resolve variant classes. - PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back( - PointId); + // FIXME: we should be using the tuple of classes for instructions in the + // snippet as key. + const llvm::MCInst &MCI = Point.Key.Instructions[0]; + unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass(); + const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel() + .getSchedClassDesc(SchedClassId) + ->isVariant(); + SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI); + const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); + if (IndexIt == SchedClassIdToIndex.end()) { + // Create a new entry. + SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); + ResolvedSchedClassAndPoints Entry( + ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant)); + Entry.PointIds.push_back(PointId); + Entries.push_back(std::move(Entry)); + } else { + // Append to the existing entry. + Entries[IndexIt->second].PointIds.push_back(PointId); + } } - return PointsPerSchedClass; + return Entries; } // Uops repeat the same opcode over again. Just show this opcode and show the @@ -239,8 +272,8 @@ } void Analysis::printSchedClassClustersHtml( - const std::vector &Clusters, const SchedClass &SC, - llvm::raw_ostream &OS) const { + const std::vector &Clusters, + const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const { const auto &Points = Clustering_.getPoints(); OS << ""; OS << ""; @@ -254,7 +287,7 @@ OS << ""; for (const SchedClassCluster &Cluster : Clusters) { OS << "
ClusterIdOpcode/Config
"; @@ -369,12 +402,17 @@ return Result; } -Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD, - const llvm::MCSubtargetInfo &STI) - : SCDesc(&SD), - NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)), +Analysis::ResolvedSchedClass::ResolvedSchedClass( + const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId, + bool WasVariant) + : SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)), + WasVariant(WasVariant), + NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)), IdealizedProcResPressure(computeIdealizedProcResPressure( - STI.getSchedModel(), NonRedundantWriteProcRes)) {} + STI.getSchedModel(), NonRedundantWriteProcRes)) { + assert((SCDesc == nullptr || !SCDesc->isVariant()) && + "ResolvedSchedClass should never be variant"); +} void Analysis::SchedClassCluster::addPoint( size_t PointId, const InstructionBenchmarkClustering &Clustering) { @@ -407,7 +445,7 @@ } bool Analysis::SchedClassCluster::measurementsMatch( - const llvm::MCSubtargetInfo &STI, const SchedClass &SC, + const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, const InstructionBenchmarkClustering &Clustering) const { const size_t NumMeasurements = Representative.size(); std::vector ClusterCenterPoint(NumMeasurements); @@ -424,9 +462,9 @@ } // Find the latency. SchedClassPoint[0].PerInstructionValue = 0.0; - for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) { + for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) { const llvm::MCWriteLatencyEntry *const WLE = - STI.getWriteLatencyEntry(SC.SCDesc, I); + STI.getWriteLatencyEntry(RSC.SCDesc, I); SchedClassPoint[0].PerInstructionValue = std::max(SchedClassPoint[0].PerInstructionValue, WLE->Cycles); } @@ -438,17 +476,17 @@ if (ProcResIdx > 0) { // Find the pressure on ProcResIdx `Key`. const auto ProcResPressureIt = - std::find_if(SC.IdealizedProcResPressure.begin(), - SC.IdealizedProcResPressure.end(), + std::find_if(RSC.IdealizedProcResPressure.begin(), + RSC.IdealizedProcResPressure.end(), [ProcResIdx](const std::pair &WPR) { return WPR.first == ProcResIdx; }); SchedClassPoint[I].PerInstructionValue = - ProcResPressureIt == SC.IdealizedProcResPressure.end() + ProcResPressureIt == RSC.IdealizedProcResPressure.end() ? 0.0 : ProcResPressureIt->second; } else if (Key == "NumMicroOps") { - SchedClassPoint[I].PerInstructionValue = SC.SCDesc->NumMicroOps; + SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps; } else { llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes " "name, got " @@ -465,26 +503,25 @@ return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint); } -void Analysis::printSchedClassDescHtml(const SchedClass &SC, +void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const { OS << ""; OS << ""; - if (SC.SCDesc->isValid()) { + if (RSC.SCDesc->isValid()) { const auto &SM = SubtargetInfo_->getSchedModel(); OS << ""; - OS << ""; - OS << ""; + OS << ""; + OS << ""; // Latencies. OS << ""; // WriteProcRes. OS << ""; // Idealized port pressure. OS << "
ValidVariantNumMicroOpsLatencyWriteProcResIdealized " "Resource Pressure
" << (SC.SCDesc->isVariant() ? "✔" : "✕") - << "" << SC.SCDesc->NumMicroOps << "" << (RSC.WasVariant ? "✔" : "✕") << "" << RSC.SCDesc->NumMicroOps << "
    "; - for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { + for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { const auto *const Entry = - SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I); + SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I); OS << "
  • " << Entry->Cycles; - if (SC.SCDesc->NumWriteLatencyEntries > 1) { + if (RSC.SCDesc->NumWriteLatencyEntries > 1) { // Dismabiguate if more than 1 latency. OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; } @@ -493,7 +530,7 @@ OS << "
    "; - for (const auto &WPR : SC.NonRedundantWriteProcRes) { + for (const auto &WPR : RSC.NonRedundantWriteProcRes) { OS << "
  • "; writeEscaped(OS, SM.getProcResource(WPR.ProcResourceIdx)->Name); @@ -502,7 +539,7 @@ OS << "
    "; - for (const auto &Pressure : SC.IdealizedProcResPressure) { + for (const auto &Pressure : RSC.IdealizedProcResPressure) { OS << "
  • "; writeEscaped(OS, SubtargetInfo_->getSchedModel() .getProcResource(Pressure.first) @@ -598,19 +635,12 @@ writeEscaped(OS, FirstPoint.CpuName); OS << ""; - for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) { - const auto SchedClassId = SchedClassAndPoints.first; - const std::vector &SchedClassPoints = SchedClassAndPoints.second; - const auto &SchedModel = SubtargetInfo_->getSchedModel(); - const llvm::MCSchedClassDesc *const SCDesc = - SchedModel.getSchedClassDesc(SchedClassId); - if (!SCDesc) + for (const auto &RSCAndPoints : makePointsPerSchedClass()) { + if (!RSCAndPoints.RSC.SCDesc) continue; - const SchedClass SC(*SCDesc, *SubtargetInfo_); - // Bucket sched class points into sched class clusters. std::vector SchedClassClusters; - for (const size_t PointId : SchedClassPoints) { + for (const size_t PointId : RSCAndPoints.PointIds) { const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); if (!ClusterId.isValid()) continue; // Ignore noise and errors. FIXME: take noise into account ? @@ -629,24 +659,24 @@ // Print any scheduling class that has at least one cluster that does not // match the checked-in data. if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(), - [this, &SC](const SchedClassCluster &C) { - return C.measurementsMatch(*SubtargetInfo_, SC, - Clustering_); + [this, &RSCAndPoints](const SchedClassCluster &C) { + return C.measurementsMatch(*SubtargetInfo_, + RSCAndPoints.RSC, Clustering_); })) continue; // Nothing weird. OS << "

    Sched Class "; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - writeEscaped(OS, SCDesc->Name); + writeEscaped(OS, RSCAndPoints.RSC.SCDesc->Name); #else OS << SchedClassId; #endif OS << " contains instructions whose performance characteristics do" " not match that of LLVM:

    "; - printSchedClassClustersHtml(SchedClassClusters, SC, OS); + printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); OS << "

    llvm SchedModel data:

    "; - printSchedClassDescHtml(SC, OS); + printSchedClassDescHtml(RSCAndPoints.RSC, OS); OS << "
    "; }