Index: tools/llvm-exegesis/lib/BenchmarkCode.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/BenchmarkCode.h @@ -0,0 +1,38 @@ +//===-- BenchmarkCode.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H + +#include "llvm/MC/MCInst.h" +#include +#include + +namespace exegesis { + +// A collection of instructions that are to be assembled, executed and measured. +struct BenchmarkCode { + // The sequence of instructions that are to be repeated. + std::vector Instructions; + + // Before the code is executed some instructions are added to setup the + // registers initial values. + std::vector RegsToDef; + + // We also need to provide the registers that are live on entry for the + // assembler to generate proper prologue/epilogue. + std::vector LiveIns; + + // Informations about how this configuration was built. + std::string Info; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H Index: tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.h +++ tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -17,10 +17,10 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H #include "Assembler.h" +#include "BenchmarkCode.h" #include "BenchmarkResult.h" #include "LlvmState.h" #include "MCInstrDescView.h" -#include "RegisterAliasing.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Error.h" #include @@ -36,23 +36,6 @@ BenchmarkFailure(const llvm::Twine &S); }; -// A collection of instructions that are to be assembled, executed and measured. -struct BenchmarkCode { - // The sequence of instructions that are to be repeated. - std::vector Instructions; - - // Before the code is executed some instructions are added to setup the - // registers initial values. - std::vector RegsToDef; - - // We also need to provide the registers that are live on entry for the - // assembler to generate proper prologue/epilogue. - std::vector LiveIns; - - // Informations about how this configuration was built. - std::string Info; -}; - // Common code for all benchmark modes. class BenchmarkRunner { public: @@ -61,12 +44,8 @@ virtual ~BenchmarkRunner(); - llvm::Expected> - run(unsigned Opcode, unsigned NumRepetitions); - - // Given a snippet, computes which registers the setup code needs to define. - std::vector - computeRegsToDef(const std::vector &Snippet) const; + InstructionBenchmark runConfiguration(const BenchmarkCode &Configuration, + unsigned NumRepetitions) const; // Scratch space to run instructions that touch memory. struct ScratchSpace { @@ -87,33 +66,12 @@ protected: const LLVMState &State; - const RegisterAliasingTrackerCache RATC; - - // Generates a single code template that has a self-dependency. - llvm::Expected - generateSelfAliasingCodeTemplate(const Instruction &Instr) const; - // Generates a single code template without assignment constraints. - llvm::Expected - generateUnconstrainedCodeTemplate(const Instruction &Instr, - llvm::StringRef Msg) const; private: - // API to be implemented by subclasses. - virtual llvm::Expected - generateCodeTemplate(unsigned Opcode) const = 0; - virtual std::vector runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, const unsigned NumRepetitions) const = 0; - // Internal helpers. - InstructionBenchmark runConfiguration(const BenchmarkCode &Configuration, - unsigned NumRepetitions) const; - - // Calls generateCodeTemplate and expands it into one or more BenchmarkCode. - llvm::Expected> - generateConfigurations(unsigned Opcode) const; - llvm::Expected writeObjectFile(const BenchmarkCode &Configuration, llvm::ArrayRef Code) const; Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Program.h" @@ -28,37 +27,10 @@ BenchmarkRunner::BenchmarkRunner(const LLVMState &State, InstructionBenchmark::ModeE Mode) - : State(State), RATC(State.getRegInfo(), - getFunctionReservedRegs(State.getTargetMachine())), - Mode(Mode), Scratch(llvm::make_unique()) {} + : State(State), Mode(Mode), Scratch(llvm::make_unique()) {} BenchmarkRunner::~BenchmarkRunner() = default; -llvm::Expected> -BenchmarkRunner::run(unsigned Opcode, unsigned NumRepetitions) { - const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode); - // Ignore instructions that we cannot run. - if (InstrDesc.isPseudo()) - return llvm::make_error("Unsupported opcode: isPseudo"); - if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) - return llvm::make_error( - "Unsupported opcode: isBranch/isIndirectBranch"); - if (InstrDesc.isCall() || InstrDesc.isReturn()) - return llvm::make_error( - "Unsupported opcode: isCall/isReturn"); - - llvm::Expected> ConfigurationOrError = - generateConfigurations(Opcode); - - if (llvm::Error E = ConfigurationOrError.takeError()) - return std::move(E); - - std::vector InstrBenchmarks; - for (const BenchmarkCode &Conf : ConfigurationOrError.get()) - InstrBenchmarks.push_back(runConfiguration(Conf, NumRepetitions)); - return InstrBenchmarks; -} - // Repeat the snippet until there are at least NumInstructions in the resulting // code. static std::vector @@ -122,74 +94,6 @@ return InstrBenchmark; } -llvm::Expected> -BenchmarkRunner::generateConfigurations(unsigned Opcode) const { - if (auto E = generateCodeTemplate(Opcode)) { - CodeTemplate &CT = E.get(); - std::vector Output; - // TODO: Generate as many BenchmarkCode as needed. - { - BenchmarkCode BC; - BC.Info = CT.Info; - for (InstructionBuilder &IB : CT.Instructions) { - IB.randomizeUnsetVariables( - CT.ScratchSpacePointerInReg - ? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits() - : RATC.emptyRegisters()); - BC.Instructions.push_back(IB.build()); - } - if (CT.ScratchSpacePointerInReg) - BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); - BC.RegsToDef = computeRegsToDef(CT.Instructions); - Output.push_back(std::move(BC)); - } - return Output; - } else - return E.takeError(); -} - -std::vector BenchmarkRunner::computeRegsToDef( - const std::vector &Instructions) const { - // Collect all register uses and create an assignment for each of them. - // Ignore memory operands which are handled separately. - // Loop invariant: DefinedRegs[i] is true iif it has been set at least once - // before the current instruction. - llvm::BitVector DefinedRegs = RATC.emptyRegisters(); - std::vector RegsToDef; - for (const InstructionBuilder &IB : Instructions) { - // Returns the register that this Operand sets or uses, or 0 if this is not - // a register. - const auto GetOpReg = [&IB](const Operand &Op) -> unsigned { - if (Op.IsMem) - return 0; - if (Op.ImplicitReg) - return *Op.ImplicitReg; - if (Op.IsExplicit && IB.getValueFor(Op).isReg()) - return IB.getValueFor(Op).getReg(); - return 0; - }; - // Collect used registers that have never been def'ed. - for (const Operand &Op : IB.Instr.Operands) { - if (!Op.IsDef) { - const unsigned Reg = GetOpReg(Op); - if (Reg > 0 && !DefinedRegs.test(Reg)) { - RegsToDef.push_back(Reg); - DefinedRegs.set(Reg); - } - } - } - // Mark defs as having been def'ed. - for (const Operand &Op : IB.Instr.Operands) { - if (Op.IsDef) { - const unsigned Reg = GetOpReg(Op); - if (Reg > 0) - DefinedRegs.set(Reg); - } - } - } - return RegsToDef; -} - llvm::Expected BenchmarkRunner::writeObjectFile(const BenchmarkCode &BC, llvm::ArrayRef Code) const { @@ -204,32 +108,4 @@ return ResultPath.str(); } -llvm::Expected BenchmarkRunner::generateSelfAliasingCodeTemplate( - const Instruction &Instr) const { - const AliasingConfigurations SelfAliasing(Instr, Instr); - if (SelfAliasing.empty()) { - return llvm::make_error("empty self aliasing"); - } - CodeTemplate CT; - InstructionBuilder IB(Instr); - if (SelfAliasing.hasImplicitAliasing()) { - CT.Info = "implicit Self cycles, picking random values."; - } else { - CT.Info = "explicit self cycles, selecting one aliasing Conf."; - // This is a self aliasing instruction so defs and uses are from the same - // instance, hence twice IB in the following call. - setRandomAliasing(SelfAliasing, IB, IB); - } - CT.Instructions.push_back(std::move(IB)); - return std::move(CT); -} - -llvm::Expected -BenchmarkRunner::generateUnconstrainedCodeTemplate(const Instruction &Instr, - llvm::StringRef Msg) const { - CodeTemplate CT; - CT.Info = llvm::formatv("{0}, repeating an unconstrained assignment", Msg); - CT.Instructions.emplace_back(Instr); - return std::move(CT); -} } // namespace exegesis Index: tools/llvm-exegesis/lib/CMakeLists.txt =================================================================== --- tools/llvm-exegesis/lib/CMakeLists.txt +++ tools/llvm-exegesis/lib/CMakeLists.txt @@ -19,6 +19,7 @@ MCInstrDescView.cpp PerfHelper.cpp RegisterAliasing.cpp + SnippetGenerator.cpp Target.cpp Uops.cpp ) Index: tools/llvm-exegesis/lib/Latency.h =================================================================== --- tools/llvm-exegesis/lib/Latency.h +++ tools/llvm-exegesis/lib/Latency.h @@ -17,14 +17,14 @@ #include "BenchmarkRunner.h" #include "MCInstrDescView.h" +#include "SnippetGenerator.h" namespace exegesis { -class LatencyBenchmarkRunner : public BenchmarkRunner { +class LatencySnippetGenerator : public SnippetGenerator { public: - LatencyBenchmarkRunner(const LLVMState &State) - : BenchmarkRunner(State, InstructionBenchmark::Latency) {} - ~LatencyBenchmarkRunner() override; + LatencySnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + ~LatencySnippetGenerator() override; llvm::Expected generateCodeTemplate(unsigned Opcode) const override; @@ -34,14 +34,21 @@ llvm::Expected generateTwoInstructionPrototype(const Instruction &Instr) const; +}; +class LatencyBenchmarkRunner : public BenchmarkRunner { +public: + LatencyBenchmarkRunner(const LLVMState &State) + : BenchmarkRunner(State, InstructionBenchmark::Latency) {} + ~LatencyBenchmarkRunner() override; + +private: std::vector runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, const unsigned NumRepetitions) const override; virtual const char *getCounterName() const; }; - } // namespace exegesis #endif // LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -29,9 +29,9 @@ return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY; } -LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; +LatencySnippetGenerator::~LatencySnippetGenerator() = default; -llvm::Error LatencyBenchmarkRunner::isInfeasible( +llvm::Error LatencySnippetGenerator::isInfeasible( const llvm::MCInstrDesc &MCInstrDesc) const { if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand)) return llvm::make_error( @@ -43,7 +43,7 @@ } llvm::Expected -LatencyBenchmarkRunner::generateTwoInstructionPrototype( +LatencySnippetGenerator::generateTwoInstructionPrototype( const Instruction &Instr) const { std::vector Opcodes; Opcodes.resize(State.getInstrInfo().getNumOpcodes()); @@ -80,7 +80,7 @@ } llvm::Expected -LatencyBenchmarkRunner::generateCodeTemplate(unsigned Opcode) const { +LatencySnippetGenerator::generateCodeTemplate(unsigned Opcode) const { const auto &InstrDesc = State.getInstrInfo().get(Opcode); if (auto E = isInfeasible(InstrDesc)) return std::move(E); @@ -105,6 +105,8 @@ return CounterName; } +LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; + std::vector LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, ScratchSpace &Scratch, Index: tools/llvm-exegesis/lib/SnippetGenerator.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/SnippetGenerator.h @@ -0,0 +1,74 @@ +//===-- SnippetGenerator.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the abstract BenchmarkRunner class for measuring a certain execution +/// property of instructions (e.g. latency). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H + +#include "Assembler.h" +#include "BenchmarkCode.h" +#include "LlvmState.h" +#include "MCInstrDescView.h" +#include "RegisterAliasing.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +namespace exegesis { + +// A class representing failures that happened during Benchmark, they are used +// to report informations to the user. +class SnippetGeneratorFailure : public llvm::StringError { +public: + SnippetGeneratorFailure(const llvm::Twine &S); +}; + +// Common code for all benchmark modes. +class SnippetGenerator { +public: + explicit SnippetGenerator(const LLVMState &State); + + virtual ~SnippetGenerator(); + + // Calls generateCodeTemplate and expands it into one or more BenchmarkCode. + llvm::Expected> + generateConfigurations(unsigned Opcode) const; + + // Given a snippet, computes which registers the setup code needs to define. + std::vector + computeRegsToDef(const std::vector &Snippet) const; + +protected: + const LLVMState &State; + const RegisterAliasingTrackerCache RATC; + + // Generates a single code template that has a self-dependency. + llvm::Expected + generateSelfAliasingCodeTemplate(const Instruction &Instr) const; + // Generates a single code template without assignment constraints. + llvm::Expected + generateUnconstrainedCodeTemplate(const Instruction &Instr, + llvm::StringRef Msg) const; + +private: + // API to be implemented by subclasses. + virtual llvm::Expected + generateCodeTemplate(unsigned Opcode) const = 0; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H Index: tools/llvm-exegesis/lib/SnippetGenerator.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -0,0 +1,130 @@ +//===-- SnippetGenerator.cpp ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "Assembler.h" +#include "MCInstrDescView.h" +#include "SnippetGenerator.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Program.h" + +namespace exegesis { + +SnippetGeneratorFailure::SnippetGeneratorFailure(const llvm::Twine &S) + : llvm::StringError(S, llvm::inconvertibleErrorCode()) {} + +SnippetGenerator::SnippetGenerator(const LLVMState &State) + : State(State), RATC(State.getRegInfo(), + getFunctionReservedRegs(State.getTargetMachine())) {} + +SnippetGenerator::~SnippetGenerator() = default; + +llvm::Expected> +SnippetGenerator::generateConfigurations(unsigned Opcode) const { + if (auto E = generateCodeTemplate(Opcode)) { + CodeTemplate &CT = E.get(); + std::vector Output; + // TODO: Generate as many BenchmarkCode as needed. + { + BenchmarkCode BC; + BC.Info = CT.Info; + for (InstructionBuilder &IB : CT.Instructions) { + IB.randomizeUnsetVariables( + CT.ScratchSpacePointerInReg + ? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits() + : RATC.emptyRegisters()); + BC.Instructions.push_back(IB.build()); + } + if (CT.ScratchSpacePointerInReg) + BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); + BC.RegsToDef = computeRegsToDef(CT.Instructions); + Output.push_back(std::move(BC)); + } + return Output; + } else + return E.takeError(); +} + +std::vector SnippetGenerator::computeRegsToDef( + const std::vector &Instructions) const { + // Collect all register uses and create an assignment for each of them. + // Ignore memory operands which are handled separately. + // Loop invariant: DefinedRegs[i] is true iif it has been set at least once + // before the current instruction. + llvm::BitVector DefinedRegs = RATC.emptyRegisters(); + std::vector RegsToDef; + for (const InstructionBuilder &IB : Instructions) { + // Returns the register that this Operand sets or uses, or 0 if this is not + // a register. + const auto GetOpReg = [&IB](const Operand &Op) -> unsigned { + if (Op.IsMem) + return 0; + if (Op.ImplicitReg) + return *Op.ImplicitReg; + if (Op.IsExplicit && IB.getValueFor(Op).isReg()) + return IB.getValueFor(Op).getReg(); + return 0; + }; + // Collect used registers that have never been def'ed. + for (const Operand &Op : IB.Instr.Operands) { + if (!Op.IsDef) { + const unsigned Reg = GetOpReg(Op); + if (Reg > 0 && !DefinedRegs.test(Reg)) { + RegsToDef.push_back(Reg); + DefinedRegs.set(Reg); + } + } + } + // Mark defs as having been def'ed. + for (const Operand &Op : IB.Instr.Operands) { + if (Op.IsDef) { + const unsigned Reg = GetOpReg(Op); + if (Reg > 0) + DefinedRegs.set(Reg); + } + } + } + return RegsToDef; +} + +llvm::Expected SnippetGenerator::generateSelfAliasingCodeTemplate( + const Instruction &Instr) const { + const AliasingConfigurations SelfAliasing(Instr, Instr); + if (SelfAliasing.empty()) { + return llvm::make_error("empty self aliasing"); + } + CodeTemplate CT; + InstructionBuilder IB(Instr); + if (SelfAliasing.hasImplicitAliasing()) { + CT.Info = "implicit Self cycles, picking random values."; + } else { + CT.Info = "explicit self cycles, selecting one aliasing Conf."; + // This is a self aliasing instruction so defs and uses are from the same + // instance, hence twice IB in the following call. + setRandomAliasing(SelfAliasing, IB, IB); + } + CT.Instructions.push_back(std::move(IB)); + return std::move(CT); +} + +llvm::Expected +SnippetGenerator::generateUnconstrainedCodeTemplate(const Instruction &Instr, + llvm::StringRef Msg) const { + CodeTemplate CT; + CT.Info = llvm::formatv("{0}, repeating an unconstrained assignment", Msg); + CT.Instructions.emplace_back(Instr); + return std::move(CT); +} +} // namespace exegesis Index: tools/llvm-exegesis/lib/Target.h =================================================================== --- tools/llvm-exegesis/lib/Target.h +++ tools/llvm-exegesis/lib/Target.h @@ -20,6 +20,7 @@ #include "BenchmarkResult.h" #include "BenchmarkRunner.h" #include "LlvmState.h" +#include "SnippetGenerator.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/CallingConv.h" @@ -61,6 +62,10 @@ // matter as long as it's large enough. virtual unsigned getMaxMemoryAccessSize() const { return 0; } + // Creates a snippet generator for the given mode. + std::unique_ptr + createSnippetGenerator(InstructionBenchmark::ModeE Mode, + const LLVMState &State) const; // Creates a benchmark runner for the given mode. std::unique_ptr createBenchmarkRunner(InstructionBenchmark::ModeE Mode, @@ -79,8 +84,12 @@ private: virtual bool matchesArch(llvm::Triple::ArchType Arch) const = 0; - // Targets can implement their own Latency/Uops benchmarks runners by + // Targets can implement their own snippet generators/benchmarks runners by // implementing these. + std::unique_ptr virtual createLatencySnippetGenerator( + const LLVMState &State) const; + std::unique_ptr virtual createUopsSnippetGenerator( + const LLVMState &State) const; std::unique_ptr virtual createLatencyBenchmarkRunner( const LLVMState &State) const; std::unique_ptr virtual createUopsBenchmarkRunner( Index: tools/llvm-exegesis/lib/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/Target.cpp +++ tools/llvm-exegesis/lib/Target.cpp @@ -36,6 +36,20 @@ FirstTarget = Target; } +std::unique_ptr +ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode, + const LLVMState &State) const { + switch (Mode) { + case InstructionBenchmark::Unknown: + return nullptr; + case InstructionBenchmark::Latency: + return createLatencySnippetGenerator(State); + case InstructionBenchmark::Uops: + return createUopsSnippetGenerator(State); + } + return nullptr; +} + std::unique_ptr ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode, const LLVMState &State) const { @@ -50,6 +64,16 @@ return nullptr; } +std::unique_ptr +ExegesisTarget::createLatencySnippetGenerator(const LLVMState &State) const { + return llvm::make_unique(State); +} + +std::unique_ptr +ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const { + return llvm::make_unique(State); +} + std::unique_ptr ExegesisTarget::createLatencyBenchmarkRunner(const LLVMState &State) const { return llvm::make_unique(State); Index: tools/llvm-exegesis/lib/Uops.h =================================================================== --- tools/llvm-exegesis/lib/Uops.h +++ tools/llvm-exegesis/lib/Uops.h @@ -16,14 +16,14 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H #include "BenchmarkRunner.h" +#include "SnippetGenerator.h" namespace exegesis { -class UopsBenchmarkRunner : public BenchmarkRunner { +class UopsSnippetGenerator : public SnippetGenerator { public: - UopsBenchmarkRunner(const LLVMState &State) - : BenchmarkRunner(State, InstructionBenchmark::Uops) {} - ~UopsBenchmarkRunner() override; + UopsSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + ~UopsSnippetGenerator() override; llvm::Expected generateCodeTemplate(unsigned Opcode) const override; @@ -33,10 +33,6 @@ private: llvm::Error isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const; - std::vector - runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, - const unsigned NumRepetitions) const override; - // Instantiates memory operands within a snippet. // To make computations as parallel as possible, we generate independant // memory locations for instructions that load and store. If there are less @@ -65,6 +61,20 @@ std::vector &Snippet) const; }; +class UopsBenchmarkRunner : public BenchmarkRunner { +public: + UopsBenchmarkRunner(const LLVMState &State) + : BenchmarkRunner(State, InstructionBenchmark::Uops) {} + ~UopsBenchmarkRunner() override; + + static constexpr const size_t kMinNumDifferentAddresses = 6; + +private: + std::vector + runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, + const unsigned NumRepetitions) const override; +}; + } // namespace exegesis #endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -86,7 +86,7 @@ } llvm::Error -UopsBenchmarkRunner::isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const { +UopsSnippetGenerator::isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const { if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand)) return llvm::make_error( "Infeasible : has unknown operands"); @@ -123,8 +123,9 @@ } UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; +UopsSnippetGenerator::~UopsSnippetGenerator() = default; -void UopsBenchmarkRunner::instantiateMemoryOperands( +void UopsSnippetGenerator::instantiateMemoryOperands( const unsigned ScratchSpacePointerInReg, std::vector &Instructions) const { if (ScratchSpacePointerInReg == 0) @@ -144,11 +145,12 @@ ++I; Instructions.push_back(std::move(IB)); } - assert(I * MemStep < ScratchSpace::kSize && "not enough scratch space"); + assert(I * MemStep < BenchmarkRunner::ScratchSpace::kSize && + "not enough scratch space"); } llvm::Expected -UopsBenchmarkRunner::generateCodeTemplate(unsigned Opcode) const { +UopsSnippetGenerator::generateCodeTemplate(unsigned Opcode) const { const auto &InstrDesc = State.getInstrInfo().get(Opcode); if (auto E = isInfeasible(InstrDesc)) return std::move(E); @@ -285,6 +287,6 @@ return Result; } -constexpr const size_t UopsBenchmarkRunner::kMinNumDifferentAddresses; +constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses; } // namespace exegesis Index: tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/X86/Target.cpp +++ tools/llvm-exegesis/lib/X86/Target.cpp @@ -22,7 +22,7 @@ namespace { // Common code for X86 Uops and Latency runners. -template class X86BenchmarkRunner : public Impl { +template class X86SnippetGenerator : public Impl { using Impl::Impl; llvm::Expected @@ -71,21 +71,23 @@ } }; -class X86LatencyImpl : public LatencyBenchmarkRunner { +class X86LatencyImpl : public LatencySnippetGenerator { protected: - using Base = LatencyBenchmarkRunner; + using Base = LatencySnippetGenerator; using Base::Base; llvm::Expected handleCompareFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 CompareFP"); + return llvm::make_error( + "Unsupported x87 CompareFP"); } llvm::Expected handleCondMovFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 CondMovFP"); + return llvm::make_error( + "Unsupported x87 CondMovFP"); } }; -class X86UopsImpl : public UopsBenchmarkRunner { +class X86UopsImpl : public UopsSnippetGenerator { protected: - using Base = UopsBenchmarkRunner; + using Base = UopsSnippetGenerator; using Base::Base; // We can compute uops for any FP instruction that does not grow or shrink the // stack (either do not touch the stack or push as much as they pop). @@ -193,14 +195,14 @@ return {}; } - std::unique_ptr - createLatencyBenchmarkRunner(const LLVMState &State) const override { - return llvm::make_unique>(State); + std::unique_ptr + createLatencySnippetGenerator(const LLVMState &State) const override { + return llvm::make_unique>(State); } - std::unique_ptr - createUopsBenchmarkRunner(const LLVMState &State) const override { - return llvm::make_unique>(State); + std::unique_ptr + createUopsSnippetGenerator(const LLVMState &State) const override { + return llvm::make_unique>(State); } bool matchesArch(llvm::Triple::ArchType Arch) const override { Index: tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- tools/llvm-exegesis/llvm-exegesis.cpp +++ tools/llvm-exegesis/llvm-exegesis.cpp @@ -119,6 +119,30 @@ return Ctx; } +// Generates code snippets for opcode `Opcode`. +llvm::Expected> +generateSnippets(const LLVMState &State, unsigned Opcode, + unsigned NumRepetitions) { + const std::unique_ptr Generator = + State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State); + if (!Generator) { + llvm::report_fatal_error("cannot create snippet generator"); + } + + const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode); + // Ignore instructions that we cannot run. + if (InstrDesc.isPseudo()) + return llvm::make_error("Unsupported opcode: isPseudo"); + if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) + return llvm::make_error( + "Unsupported opcode: isBranch/isIndirectBranch"); + if (InstrDesc.isCall() || InstrDesc.isReturn()) + return llvm::make_error( + "Unsupported opcode: isCall/isReturn"); + + return Generator->generateConfigurations(Opcode); +} + void benchmarkMain() { if (exegesis::pfm::pfmInitialize()) llvm::report_fatal_error("cannot initialize libpfm"); @@ -140,6 +164,10 @@ return; } + // FIXME: Allow arbitrary code. + const std::vector Configurations = + ExitOnErr(generateSnippets(State, Opcode, NumRepetitions)); + const std::unique_ptr Runner = State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State); if (!Runner) { @@ -154,11 +182,12 @@ BenchmarkFile = "-"; const BenchmarkResultContext Context = getBenchmarkResultContext(State); - std::vector Results = - ExitOnErr(Runner->run(Opcode, NumRepetitions)); - for (InstructionBenchmark &Result : Results) - ExitOnErr(Result.writeYaml(Context, BenchmarkFile)); + for (const BenchmarkCode &Conf : Configurations) { + InstructionBenchmark Result = + Runner->runConfiguration(Conf, NumRepetitions); + ExitOnErr(Result.writeYaml(Context, BenchmarkFile)); + } exegesis::pfm::pfmTerminate(); } Index: unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -52,26 +52,27 @@ const llvm::MCRegisterInfo &MCRegisterInfo; }; -template +template class SnippetGeneratorTest : public X86SnippetGeneratorTest { protected: - SnippetGeneratorTest() : Runner(State) {} + SnippetGeneratorTest() : Generator(State) {} CodeTemplate checkAndGetCodeTemplate(unsigned Opcode) { randomGenerator().seed(0); // Initialize seed. - auto CodeTemplateOrError = Runner.generateCodeTemplate(Opcode); + auto CodeTemplateOrError = Generator.generateCodeTemplate(Opcode); EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration. return std::move(CodeTemplateOrError.get()); } - BenchmarkRunner Runner; + SnippetGeneratorT Generator; }; -using LatencyBenchmarkRunnerTest = SnippetGeneratorTest; +using LatencySnippetGeneratorTest = + SnippetGeneratorTest; -using UopsBenchmarkRunnerTest = SnippetGeneratorTest; +using UopsSnippetGeneratorTest = SnippetGeneratorTest; -TEST_F(LatencyBenchmarkRunnerTest, ImplicitSelfDependency) { +TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) { // ADC16i16 self alias because of implicit use and def. // explicit use 0 : imm @@ -93,7 +94,7 @@ EXPECT_THAT(IB.VariableValues[0], IsInvalid()) << "Immediate is not set"; } -TEST_F(LatencyBenchmarkRunnerTest, ExplicitSelfDependency) { +TEST_F(LatencySnippetGeneratorTest, ExplicitSelfDependency) { // ADD16ri self alias because Op0 and Op1 are tied together. // explicit def 0 : reg RegClass=GR16 @@ -112,7 +113,7 @@ EXPECT_THAT(IB.VariableValues[1], IsInvalid()) << "Operand 2 is not set"; } -TEST_F(LatencyBenchmarkRunnerTest, DependencyThroughOtherOpcode) { +TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { // CMP64rr // explicit use 0 : reg RegClass=GR64 // explicit use 1 : reg RegClass=GR64 @@ -131,7 +132,7 @@ // TODO: check that the two instructions alias each other. } -TEST_F(LatencyBenchmarkRunnerTest, LAHF) { +TEST_F(LatencySnippetGeneratorTest, LAHF) { const unsigned Opcode = llvm::X86::LAHF; const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); EXPECT_THAT(CT.Info, HasSubstr("cycle through")); @@ -141,7 +142,7 @@ ASSERT_THAT(IB.VariableValues, SizeIs(0)); } -TEST_F(UopsBenchmarkRunnerTest, ParallelInstruction) { +TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { // BNDCL32rr is parallel no matter what. // explicit use 0 : reg RegClass=BNDR @@ -158,7 +159,7 @@ EXPECT_THAT(IB.VariableValues[1], IsInvalid()); } -TEST_F(UopsBenchmarkRunnerTest, SerialInstruction) { +TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { // CDQ is serial no matter what. // implicit def : EAX @@ -173,7 +174,7 @@ ASSERT_THAT(IB.VariableValues, SizeIs(0)); } -TEST_F(UopsBenchmarkRunnerTest, StaticRenaming) { +TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { // CMOVA32rr has tied variables, we enumarate the possible values to execute // as many in parallel as possible. @@ -195,7 +196,7 @@ << "Each instruction writes to a different register"; } -TEST_F(UopsBenchmarkRunnerTest, NoTiedVariables) { +TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) { // CMOV_GR32 has no tied variables, we make sure def and use are different // from each other. @@ -218,13 +219,13 @@ EXPECT_THAT(IB.VariableValues[3], IsInvalid()); } -TEST_F(UopsBenchmarkRunnerTest, MemoryUse) { +TEST_F(UopsSnippetGeneratorTest, MemoryUse) { // Mov32rm reads from memory. const unsigned Opcode = llvm::X86::MOV32rm; const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); ASSERT_THAT(CT.Instructions, - SizeIs(UopsBenchmarkRunner::kMinNumDifferentAddresses)); + SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); const InstructionBuilder &IB = CT.Instructions[0]; EXPECT_THAT(IB.getOpcode(), Opcode); ASSERT_THAT(IB.VariableValues, SizeIs(6)); @@ -234,18 +235,17 @@ EXPECT_EQ(IB.VariableValues[5].getReg(), 0u); } -TEST_F(UopsBenchmarkRunnerTest, MemoryUse_Movsb) { +TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) { // MOVSB writes to scratch memory register. const unsigned Opcode = llvm::X86::MOVSB; - auto Error = Runner.generateCodeTemplate(Opcode).takeError(); + auto Error = Generator.generateCodeTemplate(Opcode).takeError(); EXPECT_TRUE((bool)Error); llvm::consumeError(std::move(Error)); } -class FakeBenchmarkRunner : public BenchmarkRunner { +class FakeSnippetGenerator : public SnippetGenerator { public: - FakeBenchmarkRunner(const LLVMState &State) - : BenchmarkRunner(State, InstructionBenchmark::Unknown) {} + FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} Instruction createInstruction(unsigned Opcode) { return Instruction(State.getInstrInfo().get(Opcode), RATC); @@ -257,15 +257,9 @@ return llvm::make_error("not implemented", llvm::inconvertibleErrorCode()); } - - std::vector - runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, - const unsigned NumRepetitions) const override { - return {}; - } }; -using FakeSnippetGeneratorTest = SnippetGeneratorTest; +using FakeSnippetGeneratorTest = SnippetGeneratorTest; TEST_F(FakeSnippetGeneratorTest, ComputeRegsToDefAdd16ri) { // ADD16ri: @@ -273,12 +267,12 @@ // explicit use 1 : reg RegClass=GR16 | TIED_TO:0 // explicit use 2 : imm // implicit def : EFLAGS - InstructionBuilder IB(Runner.createInstruction(llvm::X86::ADD16ri)); + InstructionBuilder IB(Generator.createInstruction(llvm::X86::ADD16ri)); IB.getValueFor(IB.Instr.Variables[0]) = llvm::MCOperand::createReg(llvm::X86::AX); std::vector Snippet; Snippet.push_back(std::move(IB)); - const auto RegsToDef = Runner.computeRegsToDef(Snippet); + const auto RegsToDef = Generator.computeRegsToDef(Snippet); EXPECT_THAT(RegsToDef, UnorderedElementsAre(llvm::X86::AX)); } @@ -289,14 +283,14 @@ // -> only rbx needs defining. std::vector Snippet; { - InstructionBuilder Mov(Runner.createInstruction(llvm::X86::MOV64ri)); + InstructionBuilder Mov(Generator.createInstruction(llvm::X86::MOV64ri)); Mov.getValueFor(Mov.Instr.Variables[0]) = llvm::MCOperand::createReg(llvm::X86::RAX); Mov.getValueFor(Mov.Instr.Variables[1]) = llvm::MCOperand::createImm(42); Snippet.push_back(std::move(Mov)); } { - InstructionBuilder Add(Runner.createInstruction(llvm::X86::ADD64rr)); + InstructionBuilder Add(Generator.createInstruction(llvm::X86::ADD64rr)); Add.getValueFor(Add.Instr.Variables[0]) = llvm::MCOperand::createReg(llvm::X86::RAX); Add.getValueFor(Add.Instr.Variables[1]) = @@ -304,7 +298,7 @@ Snippet.push_back(std::move(Add)); } - const auto RegsToDef = Runner.computeRegsToDef(Snippet); + const auto RegsToDef = Generator.computeRegsToDef(Snippet); EXPECT_THAT(RegsToDef, UnorderedElementsAre(llvm::X86::RBX)); }