Index: docs/CommandGuide/llvm-exegesis.rst =================================================================== --- docs/CommandGuide/llvm-exegesis.rst +++ docs/CommandGuide/llvm-exegesis.rst @@ -24,8 +24,11 @@ The main goal of this tool is to automatically (in)validate the LLVM's TableDef scheduling models. To that end, we also provide analysis of the results. -EXAMPLES: benchmarking ----------------------- +:program:`llvm-exegesis` can also benchmark arbitrary user-provided code +snippets. + +EXAMPLE 1: benchmarking instructions +------------------------------------ Assume you have an X86-64 machine. To measure the latency of a single instruction, run: @@ -75,8 +78,44 @@ FIXME: Provide an :program:`llvm-exegesis` option to test all instructions. -EXAMPLES: analysis ----------------------- + +EXAMPLE 2: benchmarking a custom code snippet +--------------------------------------------- + +To measure the latency/uops of a custom piece of code, you can specify the +`snippets-file` option (`-` reads from standard input). + +.. code-block:: bash + + $ echo "vzeroupper" | llvm-exegesis -mode=uops -snippets-file=- + +Real-life code snippets typically depend on registers or memory. +:program:`llvm-exegesis` checks the liveliness of registers (i.e. any register +use has a corresponding def or is a "live in"). If your code depends on the +value of some registers, you have two options: + - Mark the register as requiring a definition. :program:`llvm-exegesis` will + automatically assign a value to the register. This can be done using the + directive `LLVM-EXEGESIS-DEFREG `, where `` + is a bit pattern used to fill ``. If `` is smaller than + the register width, it will be sign-extended. + - Mark the register as a "live in". :program:`llvm-exegesis` will benchmark + using whatever value was in this registers on entry. This can be done using + the directive `LLVM-EXEGESIS-LIVEIN `. + +For example, the following code snippet depends on the values of XMM1 (which +will be set by the tool) and the memory buffer passed in RDI (live in). + +.. code-block:: none + + # LLVM-EXEGESIS-LIVEIN RDI + # LLVM-EXEGESIS-DEFREG XMM1 42 + vmulps (%rdi), %xmm1, %xmm2 + vhaddps %xmm2, %xmm2, %xmm3 + addq $0x10, %rdi + + +EXAMPLE 3: analysis +------------------- Assuming you have a set of benchmarked instructions (either latency or uops) as YAML in file `/tmp/benchmarks.yaml`, you can analyze the results using the @@ -132,13 +171,18 @@ .. option:: -opcode-index= - Specify the opcode to measure, by index. - Either `opcode-index` or `opcode-name` must be set. + Specify the opcode to measure, by index. See example 1 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. .. option:: -opcode-name= - Specify the opcode to measure, by name. - Either `opcode-index` or `opcode-name` must be set. + Specify the opcode to measure, by name. See example 1 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. + + .. option:: -snippets-file= + + Specify the custom code snippet to measure. See example 2 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. .. option:: -mode=[latency|uops|analysis] Index: tools/llvm-exegesis/lib/Assembler.h =================================================================== --- tools/llvm-exegesis/lib/Assembler.h +++ tools/llvm-exegesis/lib/Assembler.h @@ -30,6 +30,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "BenchmarkCode.h" namespace exegesis { @@ -39,12 +40,6 @@ // convention and target machine). llvm::BitVector getFunctionReservedRegs(const llvm::TargetMachine &TM); -// A simple object storing the value for a particular register. -struct RegisterValue { - unsigned Register; - llvm::APInt Value; -}; - // Creates a temporary `void foo(char*)` function containing the provided // Instructions. Runs a set of llvm Passes to provide correct prologue and // epilogue. Once the MachineFunction is ready, it is assembled for TM to Index: tools/llvm-exegesis/lib/BenchmarkCode.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkCode.h +++ tools/llvm-exegesis/lib/BenchmarkCode.h @@ -16,6 +16,12 @@ namespace exegesis { +// A simple object storing the value for a particular register. +struct RegisterValue { + unsigned Register; + llvm::APInt Value; +}; + // A collection of instructions that are to be assembled, executed and measured. struct BenchmarkCode { // The sequence of instructions that are to be repeated. Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -31,12 +31,12 @@ BenchmarkRunner::~BenchmarkRunner() = default; -// Repeat the snippet until there are at least NumInstructions in the resulting +// Repeat the snippet until there are at least MinInstructions in the resulting // code. static std::vector -GenerateInstructions(const BenchmarkCode &BC, const int MinInstructions) { +GenerateInstructions(const BenchmarkCode &BC, const size_t MinInstructions) { std::vector Code = BC.Instructions; - for (int I = 0; I < MinInstructions; ++I) + for (int I = 0; Code.size() < MinInstructions; ++I) Code.push_back(BC.Instructions[I % BC.Instructions.size()]); return Code; } Index: tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/X86/Target.cpp +++ tools/llvm-exegesis/lib/X86/Target.cpp @@ -170,40 +170,23 @@ // Reserves some space on the stack, fills it with the content of the provided // constant and provide methods to load the stack value into a register. struct ConstantInliner { - explicit ConstantInliner(const llvm::APInt &Constant) - : StackSize(Constant.getBitWidth() / 8) { - assert(Constant.getBitWidth() % 8 == 0 && "Must be a multiple of 8"); - add(allocateStackSpace(StackSize)); - size_t ByteOffset = 0; - for (; StackSize - ByteOffset >= 4; ByteOffset += 4) - add(fillStackSpace( - llvm::X86::MOV32mi, ByteOffset, - Constant.extractBits(32, ByteOffset * 8).getZExtValue())); - if (StackSize - ByteOffset >= 2) { - add(fillStackSpace( - llvm::X86::MOV16mi, ByteOffset, - Constant.extractBits(16, ByteOffset * 8).getZExtValue())); - ByteOffset += 2; - } - if (StackSize - ByteOffset >= 1) - add(fillStackSpace( - llvm::X86::MOV8mi, ByteOffset, - Constant.extractBits(8, ByteOffset * 8).getZExtValue())); - } + explicit ConstantInliner(const llvm::APInt &Constant) : Constant_(Constant) {} std::vector loadAndFinalize(unsigned Reg, unsigned RegBitWidth, unsigned Opcode) { - assert(StackSize * 8 == RegBitWidth && - "Value does not have the correct size"); + assert((RegBitWidth & 7) == 0 && + "RegBitWidth must be a multiple of 8 bits"); + initStack(RegBitWidth / 8); add(loadToReg(Reg, Opcode)); - add(releaseStackSpace(StackSize)); + add(releaseStackSpace(RegBitWidth / 8)); return std::move(Instructions); } std::vector loadX87AndFinalize(unsigned Reg, unsigned RegBitWidth, unsigned Opcode) { - assert(StackSize * 8 == RegBitWidth && - "Value does not have the correct size"); + assert((RegBitWidth & 7) == 0 && + "RegBitWidth must be a multiple of 8 bits"); + initStack(RegBitWidth / 8); add(llvm::MCInstBuilder(Opcode) .addReg(llvm::X86::RSP) // BaseReg .addImm(1) // ScaleAmt @@ -212,12 +195,11 @@ .addReg(0)); // Segment if (Reg != llvm::X86::ST0) add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg)); - add(releaseStackSpace(StackSize)); + add(releaseStackSpace(RegBitWidth / 8)); return std::move(Instructions); } std::vector popFlagAndFinalize() { - assert(StackSize * 8 == 64 && "Value does not have the correct size"); add(llvm::MCInstBuilder(llvm::X86::POPF64)); return std::move(Instructions); } @@ -228,7 +210,29 @@ return *this; } - const size_t StackSize; + void initStack(unsigned Bytes) { + assert(Constant_.getBitWidth() <= Bytes * 8 && + "Value does not have the correct size"); + const llvm::APInt WideConstant = Constant_.sext(Bytes * 8); + add(allocateStackSpace(Bytes)); + size_t ByteOffset = 0; + for (; Bytes - ByteOffset >= 4; ByteOffset += 4) + add(fillStackSpace( + llvm::X86::MOV32mi, ByteOffset, + WideConstant.extractBits(32, ByteOffset * 8).getZExtValue())); + if (Bytes - ByteOffset >= 2) { + add(fillStackSpace( + llvm::X86::MOV16mi, ByteOffset, + WideConstant.extractBits(16, ByteOffset * 8).getZExtValue())); + ByteOffset += 2; + } + if (Bytes - ByteOffset >= 1) + add(fillStackSpace( + llvm::X86::MOV8mi, ByteOffset, + WideConstant.extractBits(8, ByteOffset * 8).getZExtValue())); + } + + llvm::APInt Constant_; std::vector Instructions; }; Index: tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- tools/llvm-exegesis/llvm-exegesis.cpp +++ tools/llvm-exegesis/llvm-exegesis.cpp @@ -22,11 +22,17 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include @@ -42,6 +48,10 @@ OpcodeName("opcode-name", llvm::cl::desc("opcode to measure, by name"), llvm::cl::init("")); +static llvm::cl::opt + SnippetsFile("snippets-file", llvm::cl::desc("code snippets to measure"), + llvm::cl::init("")); + static llvm::cl::opt BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init("")); @@ -91,10 +101,19 @@ void LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET(); #endif -static unsigned GetOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) { - if (OpcodeName.empty() && (OpcodeIndex == 0)) +// Checks that only one of OpcodeName, OpcodeIndex or SnippetsFile is provided, +// and returns the opcode index or 0 if snippets should be read from +// `SnippetsFile`. +static unsigned getOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) { + const size_t NumSetFlags = (OpcodeName.empty() ? 0 : 1) + + (OpcodeIndex == 0 ? 0 : 1) + + (SnippetsFile.empty() ? 0 : 1); + if (NumSetFlags != 1) llvm::report_fatal_error( - "please provide one and only one of 'opcode-index' or 'opcode-name'"); + "please provide one and only one of 'opcode-index', 'opcode-name' or " + "'snippets-file'"); + if (!SnippetsFile.empty()) + return 0; if (OpcodeIndex > 0) return OpcodeIndex; // Resolve opcode name -> opcode. @@ -120,13 +139,12 @@ } // Generates code snippets for opcode `Opcode`. -llvm::Expected> +static llvm::Expected> generateSnippets(const LLVMState &State, unsigned Opcode) { const std::unique_ptr Generator = State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State); - if (!Generator) { + if (!Generator) llvm::report_fatal_error("cannot create snippet generator"); - } const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode); // Ignore instructions that we cannot run. @@ -142,31 +160,156 @@ return Generator->generateConfigurations(Opcode); } +namespace { + +// An MCStreamer that reads a BenchmarkCode definition from a file. +// The BenchmarkCode definition is just an asm file, with additional comments to +// specify which registers should be defined or are live on entry. +class BenchmarkCodeStreamer : public llvm::MCStreamer, + public llvm::AsmCommentConsumer { +public: + explicit BenchmarkCodeStreamer(llvm::MCContext *Context, + const llvm::MCRegisterInfo *TheRegInfo, + BenchmarkCode *Result) + : llvm::MCStreamer(*Context), RegInfo(TheRegInfo), Result(Result) {} + + // Implementation of the llvm::MCStreamer interface. We only care about + // instructions. + void EmitInstruction(const llvm::MCInst &instruction, + const llvm::MCSubtargetInfo &mc_subtarget_info, + bool PrintSchedInfo) override { + Result->Instructions.push_back(instruction); + } + + // Implementation of the llvm::AsmCommentConsumer. + void HandleComment(llvm::SMLoc Loc, llvm::StringRef CommentText) override { + CommentText = CommentText.trim(); + if (!CommentText.consume_front("LLVM-EXEGESIS-")) + return; + if (CommentText.consume_front("DEFREG")) { + RegisterValue RegVal; + llvm::SmallVector Parts; + CommentText.split(Parts, ' ', -1, false); + if (Parts.size() != 2 || !(RegVal.Register = findRegisterByName(Parts[0].trim()))) { + llvm::errs() << "Ignoring invalid comment 'LLVM-EXEGESIS-DEFREG " << CommentText << "\n"; + return; + } + const llvm::StringRef Value = Parts[1].trim(); + RegVal.Value = llvm::APInt(Value.size() * 4, Value, 16); + Result->RegisterInitialValues.push_back(std::move(RegVal)); + return; + } + if (CommentText.consume_front("LIVEIN")) { + if (unsigned Reg = findRegisterByName(CommentText.ltrim())) + Result->LiveIns.push_back(Reg); + return; + } + } + +private: + // We only care about instructions, we don't implement this part of the API. + void EmitCommonSymbol(llvm::MCSymbol *symbol, uint64_t size, + unsigned byte_alignment) override {} + bool EmitSymbolAttribute(llvm::MCSymbol *symbol, + llvm::MCSymbolAttr attribute) override { + return false; + } + void EmitValueToAlignment(unsigned byte_alignment, int64_t value, + unsigned value_size, + unsigned max_bytes_to_emit) override {} + void EmitZerofill(llvm::MCSection *section, llvm::MCSymbol *symbol, + uint64_t size, unsigned byte_alignment, + llvm::SMLoc Loc) override {} + + unsigned findRegisterByName(const llvm::StringRef RegName) const { + // FIXME: Can we do better than this ? + for (unsigned I = 0, E = RegInfo->getNumRegs(); I < E; ++I) { + if (RegName == RegInfo->getName(I)) + return I; + } + llvm::errs() << "'" << RegName + << "' is not a valid register name for the target\n"; + return 0; + } + + const llvm::MCRegisterInfo *const RegInfo; + BenchmarkCode *const Result; +}; + +} // namespace + +// Reads code snippets from file `Filename`. +static llvm::Expected> +readSnippets(const LLVMState &State, llvm::StringRef Filename) { + llvm::ErrorOr> BufferPtr = + llvm::MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferPtr.getError()) { + return llvm::make_error( + "cannot read snippet: " + Filename + ": " + EC.message()); + } + llvm::SourceMgr SM; + SM.AddNewSourceBuffer(std::move(BufferPtr.get()), llvm::SMLoc()); + + BenchmarkCode Result; + + llvm::MCObjectFileInfo ObjectFileInfo; + const llvm::TargetMachine &TM = State.getTargetMachine(); + llvm::MCContext Context(TM.getMCAsmInfo(), TM.getMCRegisterInfo(), + &ObjectFileInfo); + ObjectFileInfo.InitMCObjectFileInfo(TM.getTargetTriple(), /*PIC*/ false, + Context); + BenchmarkCodeStreamer Streamer(&Context, TM.getMCRegisterInfo(), &Result); + const std::unique_ptr AsmParser( + llvm::createMCAsmParser(SM, Context, Streamer, *TM.getMCAsmInfo())); + if (!AsmParser) + return llvm::make_error("cannot create asm parser"); + // Use default dialect. FIXME: allow changing the dialect. + AsmParser->setAssemblerDialect(0); + AsmParser->getLexer().setCommentConsumer(&Streamer); + + const std::unique_ptr TargetAsmParser( + TM.getTarget().createMCAsmParser(*TM.getMCSubtargetInfo(), *AsmParser, + *TM.getMCInstrInfo(), + llvm::MCTargetOptions())); + + if (!TargetAsmParser) + return llvm::make_error( + "cannot create target asm parser"); + AsmParser->setTargetParser(*TargetAsmParser); + + if (AsmParser->Run(false)) + return llvm::make_error("cannot parse asm file"); + return std::vector{std::move(Result)}; +} + void benchmarkMain() { if (exegesis::pfm::pfmInitialize()) llvm::report_fatal_error("cannot initialize libpfm"); llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetAsmParser(); #ifdef LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET(); #endif const LLVMState State; - const auto Opcode = GetOpcodeOrDie(State.getInstrInfo()); - - // Ignore instructions without a sched class if -ignore-invalid-sched-class is - // passed. - if (IgnoreInvalidSchedClass && - State.getInstrInfo().get(Opcode).getSchedClass() == 0) { - llvm::errs() << "ignoring instruction without sched class\n"; - return; + const auto Opcode = getOpcodeOrDie(State.getInstrInfo()); + + std::vector Configurations; + if (Opcode > 0) { + // Ignore instructions without a sched class if -ignore-invalid-sched-class + // is passed. + if (IgnoreInvalidSchedClass && + State.getInstrInfo().get(Opcode).getSchedClass() == 0) { + llvm::errs() << "ignoring instruction without sched class\n"; + return; + } + Configurations = ExitOnErr(generateSnippets(State, Opcode)); + } else { + Configurations = ExitOnErr(readSnippets(State, SnippetsFile)); } - // FIXME: Allow arbitrary code. - const std::vector Configurations = - ExitOnErr(generateSnippets(State, Opcode)); - const std::unique_ptr Runner = State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State); if (!Runner) {