Index: llvm/trunk/docs/CommandGuide/llvm-exegesis.rst =================================================================== --- llvm/trunk/docs/CommandGuide/llvm-exegesis.rst +++ llvm/trunk/docs/CommandGuide/llvm-exegesis.rst @@ -219,11 +219,16 @@ Specify the numPoints parameters to be used for DBSCAN clustering (`analysis` mode). -.. option:: -analysis-epsilon= +.. option:: -analysis-clustering-epsilon= - Specify the numPoints parameters to be used for DBSCAN clustering + Specify the epsilon parameter used for clustering of benchmark points (`analysis` mode). +.. option:: -analysis-inconsistency-epsilon= + + Specify the epsilon parameter used for detection of when the cluster + is different from the LLVM schedule profile values (`analysis` mode). + .. option:: -analysis-display-unstable-clusters If there is more than one benchmark for an opcode, said benchmarks may end up Index: llvm/trunk/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization.test =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization.test +++ llvm/trunk/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization.test @@ -1,6 +1,6 @@ -# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS %s -# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-epsilon=0.5 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-STABLE %s -# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-UNSTABLE %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-STABLE %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-UNSTABLE %s # We have one ADD32rr measurement, and two measurements for SQRTSSr. # The ADD32rr measurement and one of the SQRTSSr measurements are identical, Index: llvm/trunk/test/tools/llvm-exegesis/X86/analysis-epsilons.test =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/analysis-epsilons.test +++ llvm/trunk/test/tools/llvm-exegesis/X86/analysis-epsilons.test @@ -0,0 +1,63 @@ +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-TWO %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-TWO %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-ONE %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-ONE %s + +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-FAIL %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-FAIL %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=9 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-PASS %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=10 -analysis-inconsistency-epsilon=100 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-INCONSISTENCIES-PASS %s + +# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}} + +# CHECK-CLUSTERS-TWO: {{^}}0, +# CHECK-CLUSTERS-TWO-SAME: ,90.00{{$}} +# CHECK-CLUSTERS-TWO: {{^}}1, +# CHECK-CLUSTERS-TWO-SAME: ,100.00{{$}} + +# CHECK-CLUSTERS-ONE: {{^}}0, +# CHECK-CLUSTERS-ONE-SAME: ,90.00{{$}} +# CHECK-CLUSTERS-ONE-NEXT: {{^}}0, +# CHECK-CLUSTERS-ONE-SAME: ,100.00{{$}} + +# CHECK-INCONSISTENCIES-FAIL: contains instructions whose performance characteristics do not match that of LLVM +# CHECK-INCONSISTENCIES-FAIL: contains instructions whose performance characteristics do not match that of LLVM +# CHECK-INCONSISTENCIES-FAIL-NOT: contains instructions whose performance characteristics do not match that of LLVM + +# CHECK-INCONSISTENCIES-PASS-NOT: contains instructions whose performance characteristics do not match that of LLVM + +--- +mode: latency +key: + instructions: + - 'ADD32rr EDX EDX EAX' + config: '' + register_initial_values: + - 'EDX=0x0' + - 'EAX=0x0' +cpu_name: bdver2 +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: latency, value: 90, per_snippet_value: 90 } +error: '' +info: Repeating a single implicitly serial instruction +assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3 +... +--- +mode: latency +key: + instructions: + - 'SQRTSSr XMM11 XMM11' + config: '' + register_initial_values: + - 'XMM11=0x0' +cpu_name: bdver2 +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: latency, value: 100, per_snippet_value: 100 } +error: '' +info: Repeating a single explicitly serial instruction +assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3 +... Index: llvm/trunk/tools/llvm-exegesis/lib/Analysis.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Analysis.h +++ llvm/trunk/tools/llvm-exegesis/lib/Analysis.h @@ -38,6 +38,7 @@ Analysis(const llvm::Target &Target, std::unique_ptr InstrInfo, const InstructionBenchmarkClustering &Clustering, + double AnalysisInconsistencyEpsilon, bool AnalysisDisplayUnstableOpcodes); // Prints a csv of instructions for each cluster. @@ -81,7 +82,8 @@ bool measurementsMatch(const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &SC, - const InstructionBenchmarkClustering &Clustering) const; + const InstructionBenchmarkClustering &Clustering, + const double AnalysisInconsistencyEpsilonSquared_) const; void addPoint(size_t PointId, const InstructionBenchmarkClustering &Clustering); @@ -127,6 +129,7 @@ std::unique_ptr AsmInfo_; std::unique_ptr InstPrinter_; std::unique_ptr Disasm_; + const double AnalysisInconsistencyEpsilonSquared_; const bool AnalysisDisplayUnstableOpcodes_; }; Index: llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp @@ -170,8 +170,11 @@ Analysis::Analysis(const llvm::Target &Target, std::unique_ptr InstrInfo, const InstructionBenchmarkClustering &Clustering, + double AnalysisInconsistencyEpsilon, bool AnalysisDisplayUnstableOpcodes) : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)), + AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * + AnalysisInconsistencyEpsilon), AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { if (Clustering.getPoints().empty()) return; @@ -301,7 +304,8 @@ OS << ""; for (const SchedClassCluster &Cluster : Clusters) { OS << ""; @@ -461,7 +465,8 @@ bool Analysis::SchedClassCluster::measurementsMatch( const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, - const InstructionBenchmarkClustering &Clustering) const { + const InstructionBenchmarkClustering &Clustering, + const double AnalysisInconsistencyEpsilonSquared_) const { const size_t NumMeasurements = Representative.size(); std::vector ClusterCenterPoint(NumMeasurements); std::vector SchedClassPoint(NumMeasurements); @@ -520,7 +525,8 @@ llvm_unreachable("unimplemented measurement matching mode"); return false; } - return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint); + return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint, + AnalysisInconsistencyEpsilonSquared_); } void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, @@ -689,7 +695,8 @@ if (llvm::all_of(SchedClassClusters, [this, &RSCAndPoints](const SchedClassCluster &C) { return C.measurementsMatch( - *SubtargetInfo_, RSCAndPoints.RSC, Clustering_); + *SubtargetInfo_, RSCAndPoints.RSC, Clustering_, + AnalysisInconsistencyEpsilonSquared_); })) continue; // Nothing weird. Index: llvm/trunk/tools/llvm-exegesis/lib/Clustering.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Clustering.h +++ llvm/trunk/tools/llvm-exegesis/lib/Clustering.h @@ -29,7 +29,8 @@ // for more explanations on the algorithm. static llvm::Expected create(const std::vector &Points, size_t MinPts, - double Epsilon, llvm::Optional NumOpcodes = llvm::None); + double AnalysisClusteringEpsilon, + llvm::Optional NumOpcodes = llvm::None); class ClusterId { public: @@ -103,7 +104,8 @@ // Returns true if the given point is within a distance Epsilon of each other. bool isNeighbour(const std::vector &P, - const std::vector &Q) const { + const std::vector &Q, + const double EpsilonSquared_) const { double DistanceSquared = 0.0; for (size_t I = 0, E = P.size(); I < E; ++I) { const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; @@ -114,7 +116,8 @@ private: InstructionBenchmarkClustering( - const std::vector &Points, double EpsilonSquared); + const std::vector &Points, + double AnalysisClusteringEpsilonSquared); llvm::Error validateAndSetup(); void dbScan(size_t MinPts); @@ -122,7 +125,7 @@ void rangeQuery(size_t Q, std::vector &Scratchpad) const; const std::vector &Points_; - const double EpsilonSquared_; + const double AnalysisClusteringEpsilonSquared_; int NumDimensions_ = 0; // ClusterForPoint_[P] is the cluster id for Points[P]. std::vector ClusterIdForPoint_; Index: llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp @@ -46,7 +46,8 @@ const auto &PMeasurements = Points_[P].Measurements; if (PMeasurements.empty()) // Error point. continue; - if (isNeighbour(PMeasurements, QMeasurements)) { + if (isNeighbour(PMeasurements, QMeasurements, + AnalysisClusteringEpsilonSquared_)) { Neighbors.push_back(P); } } @@ -54,8 +55,9 @@ InstructionBenchmarkClustering::InstructionBenchmarkClustering( const std::vector &Points, - const double EpsilonSquared) - : Points_(Points), EpsilonSquared_(EpsilonSquared), + const double AnalysisClusteringEpsilonSquared) + : Points_(Points), + AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared), NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {} llvm::Error InstructionBenchmarkClustering::validateAndSetup() { @@ -245,8 +247,10 @@ llvm::Expected InstructionBenchmarkClustering::create( const std::vector &Points, const size_t MinPts, - const double Epsilon, llvm::Optional NumOpcodes) { - InstructionBenchmarkClustering Clustering(Points, Epsilon * Epsilon); + const double AnalysisClusteringEpsilon, + llvm::Optional NumOpcodes) { + InstructionBenchmarkClustering Clustering( + Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon); if (auto Error = Clustering.validateAndSetup()) { return std::move(Error); } Index: llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp +++ llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp @@ -84,10 +84,15 @@ "analysis-numpoints", cl::desc("minimum number of points in an analysis cluster"), cl::init(3)); -static cl::opt - AnalysisEpsilon("analysis-epsilon", - cl::desc("dbscan epsilon for analysis clustering"), - cl::init(0.1)); +static cl::opt AnalysisClusteringEpsilon( + "analysis-clustering-epsilon", + cl::desc("dbscan epsilon for benchmark point clustering"), cl::init(0.1)); + +static cl::opt AnalysisInconsistencyEpsilon( + "analysis-inconsistency-epsilon", + cl::desc("epsilon for detection of when the cluster is different from the " + "LLVM schedule profile values"), + cl::init(0.1)); static cl::opt AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""), @@ -444,9 +449,11 @@ std::unique_ptr InstrInfo(TheTarget->createMCInstrInfo()); const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create( - Points, AnalysisNumPoints, AnalysisEpsilon, InstrInfo->getNumOpcodes())); + Points, AnalysisNumPoints, AnalysisClusteringEpsilon, + InstrInfo->getNumOpcodes())); const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering, + AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes); maybeRunAnalysis(Analyzer, "analysis clusters",