diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -278,6 +278,14 @@ If non-empty, write inconsistencies found during analysis to this file. `-` prints to stdout. By default, this analysis is not run. +.. option:: -analysis-filter=[all|reg-only|mem-only] + + By default, all benchmark results are analysed, but sometimes it may be useful + to only look at those that to not involve memory, or vice versa. This option + allows to either keep all benchmarks, or filter out (ignore) either all the + ones that do involve memory (involve instructions that may read or write to + memory), or the opposite, to only keep such benchmarks. + .. option:: -analysis-clustering=[dbscan,naive] Specify the clustering algorithm to use. By default DBSCAN will be used. diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-filter.test b/llvm/test/tools/llvm-exegesis/X86/analysis-filter.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/analysis-filter.test @@ -0,0 +1,110 @@ +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck %s --check-prefixes=CHECK-CLUSTERS-ALL +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=all | FileCheck %s --check-prefixes=CHECK-CLUSTERS-ALL +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=reg-only | FileCheck %s --check-prefixes=CHECK-CLUSTERS,CHECK-CLUSTERS-REG +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=mem-only | FileCheck %s --check-prefixes=CHECK-CLUSTERS,CHECK-CLUSTERS-MEM + +# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,NumMicroOps{{$}} + +# CHECK-CLUSTERS-ALL: {{^}}0, +# CHECK-CLUSTERS-ALL-SAME: ,42.00{{$}} +# CHECK-CLUSTERS-ALL: {{^}}1, +# CHECK-CLUSTERS-ALL-SAME: ,840.00{{$}} + +# CHECK-CLUSTERS-REG: {{^}}0, +# CHECK-CLUSTERS-REG-SAME: ,42.00{{$}} + +# CHECK-CLUSTERS-MEM: {{^}}0, +# CHECK-CLUSTERS-MEM-SAME: ,840.00{{$}} + +# CHECK-CLUSTERS-NOT: {{^[0-9]+}}, + +--- +mode: uops +key: + instructions: + - 'ADDPSrr XMM8 XMM8 XMM9' + - 'ADDPSrr XMM1 XMM1 XMM13' + - 'ADDPSrr XMM12 XMM12 XMM3' + - 'ADDPSrr XMM7 XMM7 XMM13' + - 'ADDPSrr XMM5 XMM5 XMM11' + - 'ADDPSrr XMM2 XMM2 XMM14' + - 'ADDPSrr XMM6 XMM6 XMM14' + - 'ADDPSrr XMM0 XMM0 XMM4' + - 'ADDPSrr XMM10 XMM10 XMM9' + - 'ADDPSrr XMM15 XMM15 XMM9' + config: '' + register_initial_values: + - 'XMM8=0x0' + - 'XMM9=0x0' + - 'MXCSR=0x0' + - 'XMM1=0x0' + - 'XMM13=0x0' + - 'XMM12=0x0' + - 'XMM3=0x0' + - 'XMM7=0x0' + - 'XMM5=0x0' + - 'XMM11=0x0' + - 'XMM2=0x0' + - 'XMM14=0x0' + - 'XMM6=0x0' + - 'XMM0=0x0' + - 'XMM4=0x0' + - 'XMM10=0x0' + - 'XMM15=0x0' +cpu_name: znver3 +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: NumMicroOps, value: 42, per_snippet_value: 420 } +error: '' +info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, randomizing registers +assembled_snippetmode: uops +key: + instructions: + - 'ADDPSrm XMM11 XMM11 RDI i_0x1 %noreg i_0x0 %noreg' + - 'ADDPSrm XMM6 XMM6 RDI i_0x1 %noreg i_0x40 %noreg' + - 'ADDPSrm XMM12 XMM12 RDI i_0x1 %noreg i_0x80 %noreg' + - 'ADDPSrm XMM8 XMM8 RDI i_0x1 %noreg i_0xc0 %noreg' + - 'ADDPSrm XMM15 XMM15 RDI i_0x1 %noreg i_0x100 %noreg' + - 'ADDPSrm XMM2 XMM2 RDI i_0x1 %noreg i_0x140 %noreg' + - 'ADDPSrm XMM13 XMM13 RDI i_0x1 %noreg i_0x180 %noreg' + - 'ADDPSrm XMM0 XMM0 RDI i_0x1 %noreg i_0x1c0 %noreg' + - 'ADDPSrm XMM14 XMM14 RDI i_0x1 %noreg i_0x200 %noreg' + - 'ADDPSrm XMM10 XMM10 RDI i_0x1 %noreg i_0x240 %noreg' + - 'ADDPSrm XMM7 XMM7 RDI i_0x1 %noreg i_0x280 %noreg' + - 'ADDPSrm XMM3 XMM3 RDI i_0x1 %noreg i_0x2c0 %noreg' + - 'ADDPSrm XMM1 XMM1 RDI i_0x1 %noreg i_0x300 %noreg' + - 'ADDPSrm XMM4 XMM4 RDI i_0x1 %noreg i_0x340 %noreg' + - 'ADDPSrm XMM5 XMM5 RDI i_0x1 %noreg i_0x380 %noreg' + - 'ADDPSrm XMM9 XMM9 RDI i_0x1 %noreg i_0x3c0 %noreg' + config: '' + register_initial_values: + - 'XMM11=0x0' + - 'MXCSR=0x0' + - 'XMM6=0x0' + - 'XMM12=0x0' + - 'XMM8=0x0' + - 'XMM15=0x0' + - 'XMM2=0x0' + - 'XMM13=0x0' + - 'XMM0=0x0' + - 'XMM14=0x0' + - 'XMM10=0x0' + - 'XMM7=0x0' + - 'XMM3=0x0' + - 'XMM1=0x0' + - 'XMM4=0x0' + - 'XMM5=0x0' + - 'XMM9=0x0' +cpu_name: znver3 +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: NumMicroOps, value: 840, per_snippet_value: 8400 } +error: '' +info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, randomizing registers +assembled_snippetdiff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -34,6 +34,8 @@ namespace exegesis { +enum class InstructionBenchmarkFilter { All, RegOnly, WithMem }; + struct InstructionBenchmarkKey { // The LLVM opcode name. std::vector Instructions; diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -144,6 +144,18 @@ cl::desc("ignore instructions that do not define a sched class"), cl::cat(BenchmarkOptions), cl::init(false)); +static cl::opt AnalysisSnippetFilter( + "analysis-filter", cl::desc("Filter the benchmarks before analysing them"), + cl::cat(BenchmarkOptions), + cl::values( + clEnumValN(exegesis::InstructionBenchmarkFilter::All, "all", + "Keep all benchmarks (default)"), + clEnumValN(exegesis::InstructionBenchmarkFilter::RegOnly, "reg-only", + "Keep only those benchmarks that do *NOT* involve memory"), + clEnumValN(exegesis::InstructionBenchmarkFilter::WithMem, "mem-only", + "Keep only the benchmarks that *DO* involve memory")), + cl::init(exegesis::InstructionBenchmarkFilter::All)); + static cl::opt AnalysisClusteringAlgorithm( "analysis-clustering", cl::desc("the clustering algorithm to use"), @@ -495,6 +507,26 @@ ExitOnFileError(OutputFilename, std::move(Err)); } +static void filterPoints(MutableArrayRef Points, + const MCInstrInfo &MCII) { + if (AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::All) + return; + + bool WantPointsWithMemOps = + AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::WithMem; + for (InstructionBenchmark &Point : Points) { + if (!Point.Error.empty()) + continue; + if (WantPointsWithMemOps == + any_of(Point.Key.Instructions, [&MCII](const MCInst &Inst) { + const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); + return MCDesc.mayLoad() || MCDesc.mayStore(); + })) + continue; + Point.Error = "filtered out by user"; + } +} + static void analysisMain() { ExitOnErr.setBanner("llvm-exegesis: "); if (BenchmarkFile.empty()) @@ -540,7 +572,7 @@ // Read benchmarks. const LLVMState State = ExitOnErr( LLVMState::Create(TripleAndCpu.LLVMTriple, TripleAndCpu.CpuName)); - const std::vector Points = ExitOnFileError( + std::vector Points = ExitOnFileError( BenchmarkFile, InstructionBenchmark::readYamls(State, *MemoryBuffer)); outs() << "Parsed " << Points.size() << " benchmark points\n"; @@ -550,6 +582,8 @@ } // FIXME: Merge points from several runs (latency and uops). + filterPoints(Points, State.getInstrInfo()); + const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create( Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints, AnalysisClusteringEpsilon, &State.getSubtargetInfo(),