diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -195,11 +195,23 @@ to specify at least one of the `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`. -.. option:: -num-repetitions= +.. option:: -num-repetitions= Specify the number of repetitions of the asm snippet. Higher values lead to more accurate measurements but lengthen the benchmark. +.. option:: -max-configs-per-opcode= + + Specify the maximum configurations that can be generated for each opcode. + By default this is `1`, meaning that we assume that a single measurement is + enough to characterize an opcode. This might not be true of all instructions: + for example, the performance characteristics of the LEA instruction on X86 + depends on the value of assigned registers and immediates. Setting a value of + `-max-configs-per-opcode` larger than `1` allows `llvm-exegesis` to explore + more configurations to discover if some register or immediate assignments + lead to different performance characteristics. + + .. option:: -benchmarks-file= File to read (`analysis` mode) or write (`latency`/`uops`/`inverse_throughput` diff --git a/llvm/test/tools/llvm-exegesis/X86/max-configs.test b/llvm/test/tools/llvm-exegesis/X86/max-configs.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/max-configs.test @@ -0,0 +1,24 @@ +# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=1 | FileCheck -check-prefixes=CHECK,CHECK1 %s +# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=2 | FileCheck -check-prefixes=CHECK,CHECK2 %s + +CHECK: --- +CHECK-NEXT: mode: latency +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: SBB8rr +CHECK-NEXT: config: '' +CHECK-NEXT: register_initial_values: +CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' +CHECK-LAST: ... + +CHECK1-NOT: SBB8rr + +CHECK2: --- +CHECK2-NEXT: mode: latency +CHECK2-NEXT: key: +CHECK2-NEXT: instructions: +CHECK2-NEXT: SBB8rr +CHECK2-NEXT: config: '' +CHECK2-NEXT: register_initial_values: +CHECK2-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' +CHECK2-LAST: ... diff --git a/llvm/tools/llvm-exegesis/lib/Latency.h b/llvm/tools/llvm-exegesis/lib/Latency.h --- a/llvm/tools/llvm-exegesis/lib/Latency.h +++ b/llvm/tools/llvm-exegesis/lib/Latency.h @@ -24,7 +24,7 @@ class LatencySnippetGenerator : public SnippetGenerator { public: - LatencySnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + using SnippetGenerator::SnippetGenerator; ~LatencySnippetGenerator() override; llvm::Expected> diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -51,7 +51,11 @@ // Common code for all benchmark modes. class SnippetGenerator { public: - explicit SnippetGenerator(const LLVMState &State); + struct Options { + unsigned MaxConfigsPerOpcode = 1; + }; + + explicit SnippetGenerator(const LLVMState &State, const Options &Opts); virtual ~SnippetGenerator(); @@ -66,6 +70,7 @@ protected: const LLVMState &State; + const Options Opts; private: // API to be implemented by subclasses. diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -33,7 +33,8 @@ SnippetGeneratorFailure::SnippetGeneratorFailure(const llvm::Twine &S) : llvm::StringError(S, llvm::inconvertibleErrorCode()) {} -SnippetGenerator::SnippetGenerator(const LLVMState &State) : State(State) {} +SnippetGenerator::SnippetGenerator(const LLVMState &State, const Options &Opts) + : State(State), Opts(Opts) {} SnippetGenerator::~SnippetGenerator() = default; @@ -81,6 +82,9 @@ computeRegisterInitialValues(CT.Instructions); BC.Key.Config = CT.Config; Output.push_back(std::move(BC)); + if (Output.size() >= Opts.MaxConfigsPerOpcode) + return Output; // Early exit if we exceeded the number of allowed + // configs. } } return Output; diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -125,7 +125,8 @@ // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, - const LLVMState &State) const; + const LLVMState &State, + const SnippetGenerator::Options &Opts) const; // Creates a benchmark runner for the given mode. std::unique_ptr createBenchmarkRunner(InstructionBenchmark::ModeE Mode, @@ -151,9 +152,9 @@ // Targets can implement their own snippet generators/benchmarks runners by // implementing these. std::unique_ptr virtual createLatencySnippetGenerator( - const LLVMState &State) const; + const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createUopsSnippetGenerator( - const LLVMState &State) const; + const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createLatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode) const; std::unique_ptr virtual createUopsBenchmarkRunner( diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -36,17 +36,17 @@ FirstTarget = Target; } -std::unique_ptr -ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode, - const LLVMState &State) const { +std::unique_ptr ExegesisTarget::createSnippetGenerator( + InstructionBenchmark::ModeE Mode, const LLVMState &State, + const SnippetGenerator::Options &Opts) const { switch (Mode) { case InstructionBenchmark::Unknown: return nullptr; case InstructionBenchmark::Latency: - return createLatencySnippetGenerator(State); + return createLatencySnippetGenerator(State, Opts); case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: - return createUopsSnippetGenerator(State); + return createUopsSnippetGenerator(State, Opts); } return nullptr; } @@ -66,14 +66,14 @@ return nullptr; } -std::unique_ptr -ExegesisTarget::createLatencySnippetGenerator(const LLVMState &State) const { - return std::make_unique(State); +std::unique_ptr ExegesisTarget::createLatencySnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const { + return std::make_unique(State, Opts); } -std::unique_ptr -ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const { - return std::make_unique(State); +std::unique_ptr ExegesisTarget::createUopsSnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const { + return std::make_unique(State, Opts); } std::unique_ptr ExegesisTarget::createLatencyBenchmarkRunner( diff --git a/llvm/tools/llvm-exegesis/lib/Uops.h b/llvm/tools/llvm-exegesis/lib/Uops.h --- a/llvm/tools/llvm-exegesis/lib/Uops.h +++ b/llvm/tools/llvm-exegesis/lib/Uops.h @@ -22,7 +22,7 @@ class UopsSnippetGenerator : public SnippetGenerator { public: - UopsSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + using SnippetGenerator::SnippetGenerator; ~UopsSnippetGenerator() override; llvm::Expected> diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -462,14 +462,16 @@ sizeof(kUnavailableRegisters[0])); } - std::unique_ptr - createLatencySnippetGenerator(const LLVMState &State) const override { - return std::make_unique(State); + std::unique_ptr createLatencySnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique(State, Opts); } - std::unique_ptr - createUopsSnippetGenerator(const LLVMState &State) const override { - return std::make_unique(State); + std::unique_ptr createUopsSnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique(State, Opts); } bool matchesArch(llvm::Triple::ArchType Arch) const override { diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -95,6 +95,12 @@ cl::desc("number of time to repeat the asm snippet"), cl::cat(BenchmarkOptions), cl::init(10000)); +static cl::opt MaxConfigsPerOpcode( + "max-configs-per-opcode", + cl::desc( + "allow to snippet generator to generate at most that many configs"), + cl::cat(BenchmarkOptions), cl::init(1)); + static cl::opt IgnoreInvalidSchedClass( "ignore-invalid-sched-class", cl::desc("ignore instructions that do not define a sched class"), @@ -214,8 +220,11 @@ if (InstrDesc.isCall() || InstrDesc.isReturn()) return make_error("Unsupported opcode: isCall/isReturn"); + SnippetGenerator::Options Options; + Options.MaxConfigsPerOpcode = MaxConfigsPerOpcode; const std::unique_ptr Generator = - State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State); + State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State, + Options); if (!Generator) llvm::report_fatal_error("cannot create snippet generator"); return Generator->generateConfigurations(Instr, ForbiddenRegs); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -45,7 +45,7 @@ template class SnippetGeneratorTest : public X86SnippetGeneratorTest { protected: - SnippetGeneratorTest() : Generator(State) {} + SnippetGeneratorTest() : Generator(State, SnippetGenerator::Options()) {} std::vector checkAndGetCodeTemplates(unsigned Opcode) { randomGenerator().seed(0); // Initialize seed. @@ -335,7 +335,8 @@ class FakeSnippetGenerator : public SnippetGenerator { public: - FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + FakeSnippetGenerator(const LLVMState &State, const Options &Opts) + : SnippetGenerator(State, Opts) {} Instruction createInstruction(unsigned Opcode) { return State.getIC().getInstr(Opcode);