Index: docs/CommandGuide/llvm-exegesis.rst =================================================================== --- docs/CommandGuide/llvm-exegesis.rst +++ docs/CommandGuide/llvm-exegesis.rst @@ -24,8 +24,11 @@ The main goal of this tool is to automatically (in)validate the LLVM's TableDef scheduling models. To that end, we also provide analysis of the results. -EXAMPLES: benchmarking ----------------------- +:program:`llvm-exegesis` can also benchmark arbitrary user-provided code +snippets. + +EXAMPLE 1: benchmarking instructions +------------------------------------ Assume you have an X86-64 machine. To measure the latency of a single instruction, run: @@ -75,8 +78,43 @@ FIXME: Provide an :program:`llvm-exegesis` option to test all instructions. -EXAMPLES: analysis ----------------------- + +EXAMPLE 2: benchmarking a custom code snippet +--------------------------------------------- + +To measure the latency/uops of a custom piece of code, you can specify the +`snippets-file` option (`-` reads from standard input). + +.. code-block:: bash + + $ echo "vzeroupper" | llvm-exegesis -mode=uops -snippets-file=- + +Real-life code snippets typically depend on registers or memory. +:program:`llvm-exegesis` checks the liveliness of registers (i.e. any register +use has a corresponding def or is a "live in"). If your code depends on the +value of some registers, you have two options: + - Mark the register as requiring a definition. :program:`llvm-exegesis` will + automatically assign a value to the register. This can be done using the + directive `LLVM-EXEGESIS-DEFREG `. FIXME: Allow choosing the + immediate value. + - Mark the register as a "live in". :program:`llvm-exegesis` will benchmark + using whatever value was in this registers on entry. This can be done using + the directive `LLVM-EXEGESIS-LIVEIN `. + +For example, the following code snippet depends on the values of XMM1 (which +will be set by the tool) and the memory buffer passed in RDI (live in). + +.. code-block:: none + + # LLVM-EXEGESIS-LIVEIN RDI + # LLVM-EXEGESIS-DEFREG XMM1 + vmulps (%rdi), %xmm1, %xmm2 + vhaddps %xmm2, %xmm2, %xmm3 + addq $0x10, %rdi + + +EXAMPLE 3: analysis +------------------- Assuming you have a set of benchmarked instructions (either latency or uops) as YAML in file `/tmp/benchmarks.yaml`, you can analyze the results using the @@ -132,13 +170,18 @@ .. option:: -opcode-index= - Specify the opcode to measure, by index. - Either `opcode-index` or `opcode-name` must be set. + Specify the opcode to measure, by index. See example 1 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. .. option:: -opcode-name= - Specify the opcode to measure, by name. - Either `opcode-index` or `opcode-name` must be set. + Specify the opcode to measure, by name. See example 1 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. + + .. option:: -snippets-file= + + Specify the custom code snippet to measure. See example 2 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. .. option:: -mode=[latency|uops|analysis] Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -31,12 +31,12 @@ BenchmarkRunner::~BenchmarkRunner() = default; -// Repeat the snippet until there are at least NumInstructions in the resulting +// Repeat the snippet until there are at least MinInstructions in the resulting // code. static std::vector -GenerateInstructions(const BenchmarkCode &BC, const int MinInstructions) { +GenerateInstructions(const BenchmarkCode &BC, const size_t MinInstructions) { std::vector Code = BC.Instructions; - for (int I = 0; I < MinInstructions; ++I) + for (int I = 0; Code.size() < MinInstructions; ++I) Code.push_back(BC.Instructions[I % BC.Instructions.size()]); return Code; } Index: tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- tools/llvm-exegesis/llvm-exegesis.cpp +++ tools/llvm-exegesis/llvm-exegesis.cpp @@ -22,11 +22,17 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include @@ -42,6 +48,10 @@ OpcodeName("opcode-name", llvm::cl::desc("opcode to measure, by name"), llvm::cl::init("")); +static llvm::cl::opt + SnippetsFile("snippets-file", llvm::cl::desc("code snippets to measure"), + llvm::cl::init("")); + static llvm::cl::opt BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init("")); @@ -91,10 +101,19 @@ void LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET(); #endif -static unsigned GetOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) { - if (OpcodeName.empty() && (OpcodeIndex == 0)) +// Checks that only one of OpcodeName, OpcodeIndex or SnippetsFile is provided, +// and returns the opcode index or 0 if snippets should be read from +// `SnippetsFile`. +static unsigned getOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) { + const size_t NumSetFlags = (OpcodeName.empty() ? 0 : 1) + + (OpcodeIndex == 0 ? 0 : 1) + + (SnippetsFile.empty() ? 0 : 1); + if (NumSetFlags != 1) llvm::report_fatal_error( - "please provide one and only one of 'opcode-index' or 'opcode-name'"); + "please provide one and only one of 'opcode-index', 'opcode-name' or " + "'snippets-file'"); + if (!SnippetsFile.empty()) + return 0; if (OpcodeIndex > 0) return OpcodeIndex; // Resolve opcode name -> opcode. @@ -120,14 +139,12 @@ } // Generates code snippets for opcode `Opcode`. -llvm::Expected> -generateSnippets(const LLVMState &State, unsigned Opcode, - unsigned NumRepetitions) { +static llvm::Expected> +generateSnippets(const LLVMState &State, unsigned Opcode) { const std::unique_ptr Generator = State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State); - if (!Generator) { + if (!Generator) llvm::report_fatal_error("cannot create snippet generator"); - } const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode); // Ignore instructions that we cannot run. @@ -143,31 +160,148 @@ return Generator->generateConfigurations(Opcode); } +namespace { + +// An MCStreamer that reads a BenchmarkCode definition from a file. +// The BenchmarkCode definition is just an asm file, with additional comments to +// specify which registers should be defined or are live on entry. +class BenchmarkCodeStreamer : public llvm::MCStreamer, + public llvm::AsmCommentConsumer { +public: + explicit BenchmarkCodeStreamer(llvm::MCContext *Context, + const llvm::MCRegisterInfo *TheRegInfo, + BenchmarkCode *Result) + : llvm::MCStreamer(*Context), RegInfo(TheRegInfo), Result(Result) {} + + // Implementation of the llvm::MCStreamer interface. We only care about + // instructions. + void EmitInstruction(const llvm::MCInst &instruction, + const llvm::MCSubtargetInfo &mc_subtarget_info, + bool PrintSchedInfo) override { + Result->Instructions.push_back(instruction); + } + + // Implementation of the llvm::AsmCommentConsumer. + void HandleComment(llvm::SMLoc Loc, llvm::StringRef CommentText) override { + CommentText = CommentText.trim(); + if (!CommentText.consume_front("LLVM-EXEGESIS-")) + return; + if (CommentText.consume_front("DEFREG")) { + if (unsigned Reg = findRegisterByName(CommentText.ltrim())) + Result->RegsToDef.push_back(Reg); + return; + } + if (CommentText.consume_front("LIVEIN")) { + if (unsigned Reg = findRegisterByName(CommentText.ltrim())) + Result->LiveIns.push_back(Reg); + return; + } + } + +private: + // We only care about instructions, we don't implement this part of the API. + void EmitCommonSymbol(llvm::MCSymbol *symbol, uint64_t size, + unsigned byte_alignment) override {} + bool EmitSymbolAttribute(llvm::MCSymbol *symbol, + llvm::MCSymbolAttr attribute) override { + return false; + } + void EmitValueToAlignment(unsigned byte_alignment, int64_t value, + unsigned value_size, + unsigned max_bytes_to_emit) override {} + void EmitZerofill(llvm::MCSection *section, llvm::MCSymbol *symbol, + uint64_t size, unsigned byte_alignment, + llvm::SMLoc Loc) override {} + + unsigned findRegisterByName(const llvm::StringRef RegName) const { + // FIXME: Can we do better than this ? + for (unsigned I = 0, E = RegInfo->getNumRegs(); I < E; ++I) { + if (RegName == RegInfo->getName(I)) + return I; + } + llvm::errs() << "'" << RegName + << "' is not a valid register name for the target\n"; + return 0; + } + + const llvm::MCRegisterInfo *const RegInfo; + BenchmarkCode *const Result; +}; + +} // namespace + +// Reads code snippets from file `Filename`. +static llvm::Expected> +readSnippets(const LLVMState &State, llvm::StringRef Filename) { + llvm::ErrorOr> BufferPtr = + llvm::MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferPtr.getError()) { + return llvm::make_error( + "cannot read snippet: " + Filename + ": " + EC.message()); + } + llvm::SourceMgr SM; + SM.AddNewSourceBuffer(std::move(BufferPtr.get()), llvm::SMLoc()); + + BenchmarkCode Result; + + llvm::MCObjectFileInfo ObjectFileInfo; + const llvm::TargetMachine &TM = State.getTargetMachine(); + llvm::MCContext Context(TM.getMCAsmInfo(), TM.getMCRegisterInfo(), + &ObjectFileInfo); + ObjectFileInfo.InitMCObjectFileInfo(TM.getTargetTriple(), /*PIC*/ false, + Context); + BenchmarkCodeStreamer Streamer(&Context, TM.getMCRegisterInfo(), &Result); + const std::unique_ptr AsmParser( + llvm::createMCAsmParser(SM, Context, Streamer, *TM.getMCAsmInfo())); + if (!AsmParser) + return llvm::make_error("cannot create asm parser"); + // Use default dialect. FIXME: allow changing the dialect. + AsmParser->setAssemblerDialect(0); + AsmParser->getLexer().setCommentConsumer(&Streamer); + + const std::unique_ptr TargetAsmParser( + TM.getTarget().createMCAsmParser(*TM.getMCSubtargetInfo(), *AsmParser, + *TM.getMCInstrInfo(), + llvm::MCTargetOptions())); + + if (!TargetAsmParser) + return llvm::make_error( + "cannot create target asm parser"); + AsmParser->setTargetParser(*TargetAsmParser); + + if (AsmParser->Run(false)) + return llvm::make_error("cannot parse asm file"); + return std::vector{std::move(Result)}; +} + void benchmarkMain() { if (exegesis::pfm::pfmInitialize()) llvm::report_fatal_error("cannot initialize libpfm"); llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetAsmParser(); #ifdef LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET(); #endif const LLVMState State; - const auto Opcode = GetOpcodeOrDie(State.getInstrInfo()); - - // Ignore instructions without a sched class if -ignore-invalid-sched-class is - // passed. - if (IgnoreInvalidSchedClass && - State.getInstrInfo().get(Opcode).getSchedClass() == 0) { - llvm::errs() << "ignoring instruction without sched class\n"; - return; + const auto Opcode = getOpcodeOrDie(State.getInstrInfo()); + + std::vector Configurations; + if (Opcode > 0) { + // Ignore instructions without a sched class if -ignore-invalid-sched-class + // is passed. + if (IgnoreInvalidSchedClass && + State.getInstrInfo().get(Opcode).getSchedClass() == 0) { + llvm::errs() << "ignoring instruction without sched class\n"; + return; + } + Configurations = ExitOnErr(generateSnippets(State, Opcode)); + } else { + Configurations = ExitOnErr(readSnippets(State, SnippetsFile)); } - // FIXME: Allow arbitrary code. - const std::vector Configurations = - ExitOnErr(generateSnippets(State, Opcode, NumRepetitions)); - const std::unique_ptr Runner = State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State); if (!Runner) {