Index: tools/LLVMBuild.txt =================================================================== --- tools/LLVMBuild.txt +++ tools/LLVMBuild.txt @@ -32,6 +32,7 @@ llvm-dis llvm-dwarfdump llvm-dwp + llvm-exegesis llvm-extract llvm-jitlistener llvm-link Index: tools/llvm-exegesis/BenchmarkResult.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/BenchmarkResult.h @@ -0,0 +1,54 @@ +//===-- BenchmarkResult.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines classes to represent measurements and serialize/deserialize them to +// Yaml. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H_ + +#include +#include + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/YAMLTraits.h" + +namespace exegesis { + +struct AsmTemplate { + std::string Name; +}; + +struct BenchmarkMeasure { + std::string Key; + double Value; + std::string DebugString; +}; + +// The result of an instruction benchmark. +struct InstructionBenchmark { + AsmTemplate AsmTmpl; + std::string CpuName; + std::string LLVMTriple; + size_t NumRepetitions = 0; + std::vector Measurements; + std::string Error; + + static InstructionBenchmark readYamlOrDie(llvm::StringRef Filename); + + // Unfortunately this function is non const because of YAML traits. + void writeYamlOrDie(const llvm::StringRef Filename); +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H_ Index: tools/llvm-exegesis/BenchmarkResult.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/BenchmarkResult.cpp @@ -0,0 +1,90 @@ +//===-- BenchmarkResult.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkResult.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +// Defining YAML traits for IO. +namespace llvm { +namespace yaml { + +// std::vector will be rendered as a list. +template <> +struct SequenceElementTraits { + static const bool flow = false; +}; + +// exegesis::Measure is rendererd as a flow instead of a list. +// e.g. { "key": "the key", "value": 0123 } +template <> +struct MappingTraits { + static void mapping(IO& Io, exegesis::BenchmarkMeasure& Obj) { + Io.mapRequired("key", Obj.Key); + Io.mapRequired("value", Obj.Value); + Io.mapOptional("debug_string", Obj.DebugString); + } + static const bool flow = true; +}; + +template <> +struct MappingTraits { + static void mapping(IO& Io, exegesis::AsmTemplate& Obj) { + Io.mapRequired("name", Obj.Name); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO& Io, exegesis::InstructionBenchmark& Obj) { + Io.mapRequired("asm_template", Obj.AsmTmpl); + Io.mapRequired("cpu_name", Obj.CpuName); + Io.mapRequired("llvm_triple", Obj.LLVMTriple); + Io.mapRequired("num_repetitions", Obj.NumRepetitions); + Io.mapRequired("measurements", Obj.Measurements); + Io.mapRequired("error", Obj.Error); + } +}; + +} // namespace yaml +} // namespace llvm + +namespace exegesis { + +InstructionBenchmark InstructionBenchmark::readYamlOrDie( + llvm::StringRef Filename) { + std::unique_ptr MemBuffer = llvm::cantFail( + llvm::errorOrToExpected(llvm::MemoryBuffer::getFile(Filename))); + llvm::yaml::Input Yin(*MemBuffer); + InstructionBenchmark Benchmark; + Yin >> Benchmark; + return Benchmark; +} + +void InstructionBenchmark::writeYamlOrDie(const llvm::StringRef Filename) { + if (Filename == "-") { + llvm::yaml::Output Yout(llvm::outs()); + Yout << *this; + } else { + llvm::SmallString<1024> Buffer; + llvm::raw_svector_ostream Ostr(Buffer); + llvm::yaml::Output Yout(Ostr); + Yout << *this; + std::unique_ptr File = + llvm::cantFail(llvm::FileOutputBuffer::create(Filename, Buffer.size())); + memcpy(File->getBufferStart(), Buffer.data(), Buffer.size()); + llvm::cantFail(File->commit()); + } +} + +} // namespace exegesis Index: tools/llvm-exegesis/BenchmarkRunner.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/BenchmarkRunner.h @@ -0,0 +1,64 @@ +//===-- BenchmarkRunner.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the abstract BenchmarkRunner class for measuring a certain execution +/// property of instructions (e.g. latency). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H_ + +#include + +#include "BenchmarkResult.h" +#include "InMemoryAssembler.h" +#include "LlvmState.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Error.h" + +namespace exegesis { + +// Common code for all benchmark modes. +class BenchmarkRunner { + public: + // Subtargets can disable running benchmarks for some instructions by + // returning an error here. + class InstructionFilter { + public: + virtual ~InstructionFilter(); + + virtual llvm::Error shouldRun(const LLVMState& State, + unsigned Opcode) const { + return llvm::ErrorSuccess(); + } + }; + + virtual ~BenchmarkRunner(); + + InstructionBenchmark run(const LLVMState& State, unsigned Opcode, + unsigned NumRepetitions, + const InstructionFilter& Filter) const; + + private: + virtual const char* getDisplayName() const = 0; + + virtual llvm::Expected> createCode( + const LLVMState& State, unsigned OpcodeIndex, unsigned NumRepetitions, + const JitFunctionContext& Context) const = 0; + + virtual std::vector runMeasurements( + const LLVMState& State, const JitFunction& Function, + unsigned NumRepetitions) const = 0; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H_ Index: tools/llvm-exegesis/BenchmarkRunner.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/BenchmarkRunner.cpp @@ -0,0 +1,80 @@ +//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkRunner.h" + +#include + +#include "InMemoryAssembler.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" + +namespace exegesis { + +BenchmarkRunner::InstructionFilter::~InstructionFilter() {} + +BenchmarkRunner::~BenchmarkRunner() {} + +InstructionBenchmark BenchmarkRunner::run( + const LLVMState& State, const unsigned Opcode, unsigned NumRepetitions, + const InstructionFilter& Filter) const { + InstructionBenchmark InstrBenchmark; + + InstrBenchmark.AsmTmpl.Name = + llvm::Twine(getDisplayName()) + .concat(" ") + .concat(State.getInstrInfo().getName(Opcode)) + .str(); + InstrBenchmark.CpuName = State.getCpuName(); + InstrBenchmark.LLVMTriple = State.getTriple(); + InstrBenchmark.NumRepetitions = NumRepetitions; + + // Ignore instructions that we cannot run. + if (State.getInstrInfo().get(Opcode).isPseudo()) { + InstrBenchmark.Error = "Unsupported opcode: isPseudo"; + return InstrBenchmark; + } + if (llvm::Error E = Filter.shouldRun(State, Opcode)) { + InstrBenchmark.Error = llvm::toString(std::move(E)); + return InstrBenchmark; + } + + JitFunctionContext Context(State.createTargetMachine()); + auto ExpectedInstructions = + createCode(State, Opcode, NumRepetitions, Context); + if (llvm::Error E = ExpectedInstructions.takeError()) { + InstrBenchmark.Error = llvm::toString(std::move(E)); + return InstrBenchmark; + } + + const std::vector Instructions = *ExpectedInstructions; + const JitFunction Function(std::move(Context), Instructions); + const llvm::StringRef CodeBytes = Function.getFunctionBytes(); + + std::string AsmExcerpt; + constexpr const int kExcerptSize = 100; + constexpr const int kExcerptTailSize = 10; + if (CodeBytes.size() <= kExcerptSize) { + AsmExcerpt = llvm::toHex(CodeBytes); + } else { + AsmExcerpt = + llvm::toHex(CodeBytes.take_front(kExcerptSize - kExcerptTailSize + 3)); + AsmExcerpt += "..."; + AsmExcerpt += llvm::toHex(CodeBytes.take_back(kExcerptTailSize)); + } + llvm::outs() << "# Asm excerpt: " << AsmExcerpt << "\n"; + llvm::outs().flush(); // In case we crash. + + InstrBenchmark.Measurements = + runMeasurements(State, Function, NumRepetitions); + return InstrBenchmark; +} + +} // namespace exegesis Index: tools/llvm-exegesis/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-exegesis/CMakeLists.txt @@ -0,0 +1,37 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmPrinters + AllTargetsDescs + AllTargetsInfos + X86 + CodeGen + ExecutionEngine + MC + MCJIT + Support + ) + +add_llvm_tool(llvm-exegesis + BenchmarkResult.cpp + BenchmarkRunner.cpp + InMemoryAssembler.cpp + InstructionSnippetGenerator.cpp + Latency.cpp + llvm-exegesis.cpp + LlvmState.cpp + OperandGraph.cpp + PerfHelper.cpp + Uops.cpp + X86.cpp + ) + +if(LLVM_TOOL_LLVM_EXEGESIS_BUILD) + check_library_exists(pfm pfm_initialize "" HAVE_LIBPFM) + if(NOT HAVE_LIBPFM) + message(FATAL_ERROR "llvm-exegesis requires libpfm") + else() + target_link_libraries(llvm-exegesis PRIVATE pfm) + endif() +endif() + + + Index: tools/llvm-exegesis/InMemoryAssembler.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/InMemoryAssembler.h @@ -0,0 +1,79 @@ +//===-- InMemoryAssembler.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines classes to assemble functions composed of a single basic block of +/// MCInsts. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_INMEMORYASSEMBLER_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_INMEMORYASSEMBLER_H_ + +#include +#include + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCInst.h" + +namespace exegesis { + +// Consumable context for JitFunction below. +// This temporary object allows for retrieving MachineFunction properties before +// assembling it. +class JitFunctionContext { + public: + explicit JitFunctionContext(std::unique_ptr TM); + // Movable + JitFunctionContext(JitFunctionContext&&) = default; + JitFunctionContext& operator=(JitFunctionContext&&) = default; + // Non copyable + JitFunctionContext(const JitFunctionContext&) = delete; + JitFunctionContext& operator=(const JitFunctionContext&) = delete; + + const llvm::BitVector& getReservedRegs() const { return ReservedRegs; } + + private: + friend class JitFunction; + + std::unique_ptr Context; + std::unique_ptr TM; + std::unique_ptr MMI; + std::unique_ptr Module; + llvm::MachineFunction* MF = nullptr; + llvm::BitVector ReservedRegs; +}; + +// Creates a void() function from a sequence of llvm::MCInst. +class JitFunction { + public: + // Assembles Instructions into an executable function. + JitFunction(JitFunctionContext&& Context, + llvm::ArrayRef Instructions); + + // Retrieves the function as an array of bytes. + llvm::StringRef getFunctionBytes() const { return FunctionBytes; } + + // Retrieves the callable function. + void operator()() const { ((void (*)())FunctionBytes.data())(); } + + private: + JitFunctionContext FunctionContext; + std::unique_ptr ExecEngine; + llvm::StringRef FunctionBytes; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_INMEMORYASSEMBLER_H_ Index: tools/llvm-exegesis/InMemoryAssembler.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/InMemoryAssembler.cpp @@ -0,0 +1,233 @@ +//===-- InMemoryAssembler.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "InMemoryAssembler.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/MCJIT.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/PassInfo.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace exegesis { + +namespace { +static constexpr const char kModuleID[] = "ExegesisInfoTest"; +static constexpr const char kFunctionID[] = "foo"; + +// Small utility function to add named passes. +bool addPass(llvm::PassManagerBase& PM, llvm::StringRef PassName, + llvm::TargetPassConfig& TPC) { + const llvm::PassRegistry* PR = llvm::PassRegistry::getPassRegistry(); + const llvm::PassInfo* PI = PR->getPassInfo(PassName); + if (!PI) { + llvm::errs() << " run-pass " << PassName << " is not registered.\n"; + return true; + } + + llvm::Pass* P; + if (PI->getNormalCtor()) { + P = PI->getNormalCtor()(); + } else { + llvm::errs() << " cannot create pass: " << PI->getPassName() << "\n"; + return true; + } + std::string Banner = std::string("After ") + std::string(P->getPassName()); + PM.add(P); + TPC.printAndVerify(Banner); + + return false; +} + +// Creates a void MachineFunction with no argument. +llvm::MachineFunction& createVoidVoidMachineFunction( + llvm::StringRef FunctionID, llvm::Module* Module, + llvm::MachineModuleInfo* MMI) { + llvm::Type* const ReturnType = llvm::Type::getInt32Ty(Module->getContext()); + llvm::FunctionType* FunctionType = llvm::FunctionType::get(ReturnType, false); + llvm::Function* const F = llvm::Function::Create( + FunctionType, llvm::GlobalValue::InternalLinkage, FunctionID, Module); + // Making sure we can create a MachineFunction out of this Function even if it + // contains no IR. + F->setIsMaterializable(true); + return MMI->getOrCreateMachineFunction(*F); +} + +llvm::object::OwningBinary assemble( + llvm::Module* Module, std::unique_ptr MMI, + llvm::LLVMTargetMachine* LLVMTM) { + llvm::legacy::PassManager PM; + llvm::MCContext& Context = MMI->getContext(); + + llvm::TargetLibraryInfoImpl TLII(llvm::Triple(Module->getTargetTriple())); + PM.add(new llvm::TargetLibraryInfoWrapperPass(TLII)); + + llvm::TargetPassConfig* TPC = LLVMTM->createPassConfig(PM); + PM.add(TPC); + PM.add(MMI.release()); + TPC->printAndVerify("MachineFunctionGenerator::assemble"); + // Adding the following passes: + // - machineverifier: checks that the MachineFunction is well formed. + // - prologepilog: saves and restore callee saved registers. + for (const char* PassName : {"machineverifier", "prologepilog"}) { + if (addPass(PM, PassName, *TPC)) { + llvm::report_fatal_error("Unable to add a mandatory pass"); + } + } + TPC->setInitialized(); + + llvm::SmallVector AsmBuffer; + llvm::raw_svector_ostream AsmStream(AsmBuffer); + // AsmPrinter is responsible for generating the assembly into AsmBuffer. + if (LLVMTM->addAsmPrinter(PM, AsmStream, llvm::TargetMachine::CGFT_ObjectFile, + Context)) { + llvm::report_fatal_error("Cannot add AsmPrinter passes"); + } + + PM.run(*Module); // Run all the passes + + // Storing the generated assembly into a MemoryBuffer that owns the memory. + std::unique_ptr Buffer = + llvm::MemoryBuffer::getMemBufferCopy(AsmStream.str()); + // Create the ObjectFile from the MemoryBuffer. + std::unique_ptr Obj = llvm::cantFail( + llvm::object::ObjectFile::createObjectFile(Buffer->getMemBufferRef())); + // Returning both the MemoryBuffer and the ObjectFile. + return llvm::object::OwningBinary( + std::move(Obj), std::move(Buffer)); +} + +void fillMachineFunction(llvm::MachineFunction& MF, + llvm::ArrayRef Instructions) { + llvm::MachineBasicBlock* MBB = MF.CreateMachineBasicBlock(); + MF.push_back(MBB); + const llvm::MCInstrInfo* MCII = MF.getTarget().getMCInstrInfo(); + const llvm::DebugLoc DL; + for (const llvm::MCInst& Inst : Instructions) { + const unsigned Opcode = Inst.getOpcode(); + const llvm::MCInstrDesc& MCID = MCII->get(Opcode); + llvm::MachineInstrBuilder Builder = llvm::BuildMI(MBB, DL, MCID); + for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) { + const llvm::MCOperand& Op = Inst.getOperand(OpIndex); + if (Op.isReg()) { + const bool isDef = OpIndex < MCID.getNumDefs(); + unsigned Flags = 0; + const llvm::MCOperandInfo& OpInfo = MCID.operands().begin()[OpIndex]; + if (isDef && !OpInfo.isOptionalDef()) { + Flags |= llvm::RegState::Define; + } + Builder.addReg(Op.getReg(), Flags); + } else if (Op.isImm()) { + Builder.addImm(Op.getImm()); + } else { + llvm_unreachable("Not yet implemented"); + } + } + } + // Adding the Return Opcode. + const llvm::TargetInstrInfo* TII = MF.getSubtarget().getInstrInfo(); + llvm::BuildMI(MBB, DL, TII->get(TII->getReturnOpcode())); +} + +// Implementation of this class relies on the fact that a single object with a +// single function will be loaded into memory. +class TrackingSectionMemoryManager : public llvm::SectionMemoryManager { + public: + explicit TrackingSectionMemoryManager(uintptr_t* CodeSize) + : CodeSize(CodeSize) {} + + uint8_t* allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + llvm::StringRef SectionName) override { + *CodeSize = Size; + return llvm::SectionMemoryManager::allocateCodeSection( + Size, Alignment, SectionID, SectionName); + } + + private: + uintptr_t* const CodeSize = nullptr; +}; + +} // namespace + +JitFunctionContext::JitFunctionContext( + std::unique_ptr TheTM) + : Context(llvm::make_unique()), + TM(std::move(TheTM)), + MMI(llvm::make_unique(TM.get())), + Module(llvm::make_unique(kModuleID, *Context)) { + Module->setDataLayout(TM->createDataLayout()); + MF = &createVoidVoidMachineFunction(kFunctionID, Module.get(), MMI.get()); + // We need to instruct the passes that we're done with SSA and virtual + // registers. + auto& Properties = MF->getProperties(); + Properties.set(llvm::MachineFunctionProperties::Property::NoVRegs); + Properties.reset(llvm::MachineFunctionProperties::Property::IsSSA); + Properties.reset(llvm::MachineFunctionProperties::Property::TracksLiveness); + // prologue/epilogue pass needs the reserved registers to be frozen, this is + // usually done by the SelectionDAGISel pass. + MF->getRegInfo().freezeReservedRegs(*MF); + // Saving reserved registers for client. + ReservedRegs = MF->getSubtarget().getRegisterInfo()->getReservedRegs(*MF); +} + +JitFunction::JitFunction(JitFunctionContext&& Context, + llvm::ArrayRef Instructions) + : FunctionContext(std::move(Context)) { + // + fillMachineFunction(*FunctionContext.MF, Instructions); + // We create the pass manager, run the passes and returns the produced + // ObjectFile. + llvm::object::OwningBinary ObjHolder = + assemble(FunctionContext.Module.get(), std::move(FunctionContext.MMI), + FunctionContext.TM.get()); + assert(ObjHolder.getBinary()); + // Initializing the execution engine. + // We need to use the JIT EngineKind to be able to add an object file. + LLVMLinkInMCJIT(); + uintptr_t CodeSize = 0; + std::string Error; + ExecEngine.reset( + llvm::EngineBuilder(std::move(FunctionContext.Module)) + .setErrorStr(&Error) + .setMCPU(FunctionContext.TM->getTargetCPU()) + .setEngineKind(llvm::EngineKind::JIT) + .setMCJITMemoryManager( + llvm::make_unique(&CodeSize)) + .create(FunctionContext.TM.release())); + if (!ExecEngine) { + llvm::report_fatal_error(Error); + } + // Adding the generated object file containing the assembled function. + // The ExecutionEngine makes sure the object file is copied into an + // executable page. + ExecEngine->addObjectFile(ObjHolder.takeBinary().first); + // Setting function + FunctionBytes = + llvm::StringRef(reinterpret_cast( + ExecEngine->getFunctionAddress(kFunctionID)), + CodeSize); +} + +} // namespace exegesis Index: tools/llvm-exegesis/InstructionSnippetGenerator.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/InstructionSnippetGenerator.h @@ -0,0 +1,118 @@ +//===-- InstructionSnippetGenerator.h ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines helper classes to generate code snippets, in particular register +/// assignment. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_INSTRUCTIONSNIPPETGENERATOR_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_INSTRUCTIONSNIPPETGENERATOR_H_ + +#include + +#include "OperandGraph.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace exegesis { + +// A Variable represents a set of possible values that we need to choose from. +// It may represent one or more explicit operands that are tied together, or one +// implicit operand. +class Variable final { + public: + bool IsUse = false; + bool IsDef = false; + bool IsReg = false; + + // Lists all the explicit operand indices that are tied to this variable. + // Empty if Variable represents an implicit operand. + llvm::SmallVector ExplicitOperands; + + // - In case of explicit operands, PossibleRegisters is the expansion of the + // operands's RegClass registers. Please note that tied together explicit + // operands share the same RegClass. + // - In case of implicit operands, PossibleRegisters is a singleton MCPhysReg. + llvm::SmallSetVector PossibleRegisters; + + // If RegInfo is null, register names won't get resolved. + void print(llvm::raw_ostream& OS, const llvm::MCRegisterInfo* RegInfo) const; +}; + +// Builds a model of implicit and explicit operands for InstrDesc into +// Variables. +llvm::SmallVector getVariables( + const llvm::MCRegisterInfo& RegInfo, const llvm::MCInstrDesc& InstrDesc, + const llvm::BitVector& ReservedRegs); + +// A simple object to represent a Variable assignement. +struct VariableAssignment { + VariableAssignment(size_t VarIdx, llvm::MCPhysReg AssignedReg); + + size_t VarIdx; + llvm::MCPhysReg AssignedReg; + + bool operator==(const VariableAssignment&) const; + bool operator<(const VariableAssignment&) const; +}; + +// An AssignmentChain is a set of assignement realizing a dependency chain. +// We inherit from std::set to leverage uniqueness of elements. +using AssignmentChain = std::set; + +// Debug function to print an assignment chain. +void dumpAssignmentChain(const llvm::MCRegisterInfo& RegInfo, + const AssignmentChain& Chain); + +// Inserts Variables into a graph representing register aliasing and finds all +// the possible dependency chains for this instruction, i.e. all the possible +// assignement of operands that would make execution of the instruction +// sequential. +std::vector computeSequentialAssignmentChains( + const llvm::MCRegisterInfo& RegInfo, const llvm::ArrayRef Vars); + +// Selects a random configuration leading to a dependency chain. +// The result is a vector of the same size as `Vars`. +// `random_index_for_size` is a functor giving a random value in [0, arg[. +std::vector getRandomAssignment( + const llvm::ArrayRef Vars, + const llvm::ArrayRef Chains, + const std::function& RandomIndexForSize); + +// Finds an assignment of registers to variables such that no two variables are +// assigned the same register. +// The result is a vector of the same size as `Vars`, or `{}` if the +// assignment is not feasible. +std::vector getExclusiveAssignment( + const llvm::ArrayRef Vars); + +// Finds a greedy assignment of registers to variables. Each variable gets +// assigned the first possible register that is not already assigned to a +// previous variable. If there is no such register, the variable gets assigned +// the first possible register. +// The result is a vector of the same size as `Vars`, or `{}` if the +// assignment is not feasible. +std::vector getGreedyAssignment( + const llvm::ArrayRef Vars); + +// Generates an LLVM MCInst with the previously computed variables. +// Immediate values are set to 1. +llvm::MCInst generateMCInst(const llvm::MCInstrDesc& InstrDesc, + const llvm::ArrayRef Vars, + const llvm::ArrayRef VarRegs); + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_INSTRUCTIONSNIPPETGENERATOR_H_ Index: tools/llvm-exegesis/InstructionSnippetGenerator.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/InstructionSnippetGenerator.cpp @@ -0,0 +1,372 @@ +//===-- InstructionSnippetGenerator.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "InstructionSnippetGenerator.h" + +#include +#include +#include + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/MC/MCInstBuilder.h" + +namespace exegesis { + +void Variable::print(llvm::raw_ostream& OS, + const llvm::MCRegisterInfo* RegInfo) const { + OS << "IsUse=" << IsUse << " IsDef=" << IsDef << " possible regs: {"; + for (const size_t Reg : PossibleRegisters) { + if (RegInfo) { + OS << RegInfo->getName(Reg); + } else { + OS << Reg; + } + OS << ","; + } + OS << "} "; + if (ExplicitOperands.empty()) { + OS << "implicit"; + } else { + OS << "explicit ops: {"; + for (const size_t Op : ExplicitOperands) { + OS << Op << ","; + } + OS << "}"; + } + OS << "\n"; +} + +namespace { + +// Update the state of a Variable with an explicit operand. +void updateExplicitOperandVariable(const llvm::MCRegisterInfo& RegInfo, + const llvm::MCInstrDesc& InstrInfo, + const size_t op_index, + const llvm::BitVector& ReservedRegs, + Variable& Var) { + const bool IsDef = op_index < InstrInfo.getNumDefs(); + if (IsDef == true) Var.IsDef = true; + if (IsDef == false) Var.IsUse = true; + Var.ExplicitOperands.push_back(op_index); + const auto& op_info = InstrInfo.opInfo_begin()[op_index]; + if (op_info.RegClass >= 0) { + Var.IsReg = true; + for (const auto& reg : RegInfo.getRegClass(op_info.RegClass)) { + if (!ReservedRegs[reg]) { + Var.PossibleRegisters.insert(reg); + } + } + } +} + +Variable& findVariableWithOperand(llvm::SmallVector& Vars, + size_t op_index) { + // Vars.size() is small (<10) so a linear scan is good enough. + for (auto& Var : Vars) { + if (llvm::is_contained(Var.ExplicitOperands, op_index)) { + return Var; + } + } + assert(false && "Illegal state"); + static Variable* const kEmptyVariable = new Variable(); + return *kEmptyVariable; +} + +} // namespace + +llvm::SmallVector getVariables( + const llvm::MCRegisterInfo& RegInfo, const llvm::MCInstrDesc& InstrInfo, + const llvm::BitVector& ReservedRegs) { + llvm::SmallVector Vars; + // For each operand, its "tied to" operand or -1. + llvm::SmallVector tied_to_map; + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + tied_to_map.push_back( + InstrInfo.getOperandConstraint(i, llvm::MCOI::TIED_TO)); + } + // Adding non tied operands. + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + if (tied_to_map[i] >= 0) continue; // dropping tied ones. + Vars.emplace_back(); + updateExplicitOperandVariable(RegInfo, InstrInfo, i, ReservedRegs, + Vars.back()); + } + // Adding tied operands to existing variables. + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + if (tied_to_map[i] < 0) continue; // dropping non-tied ones. + updateExplicitOperandVariable( + RegInfo, InstrInfo, i, ReservedRegs, + findVariableWithOperand(Vars, tied_to_map[i])); + } + // Adding implicit defs. + for (size_t i = 0; i < InstrInfo.getNumImplicitDefs(); ++i) { + Vars.emplace_back(); + auto& Var = Vars.back(); + const auto reg = InstrInfo.getImplicitDefs()[i]; + assert(!ReservedRegs[reg]); + Var.PossibleRegisters.insert(reg); + Var.IsDef = true; + Var.IsReg = true; + } + // Adding implicit uses. + for (size_t i = 0; i < InstrInfo.getNumImplicitUses(); ++i) { + Vars.emplace_back(); + auto& Var = Vars.back(); + const auto reg = InstrInfo.getImplicitUses()[i]; + assert(!ReservedRegs[reg]); + Var.PossibleRegisters.insert(reg); + Var.IsUse = true; + Var.IsReg = true; + } + + return Vars; +} + +VariableAssignment::VariableAssignment(size_t VarIdx, + llvm::MCPhysReg AssignedReg) + : VarIdx(VarIdx), AssignedReg(AssignedReg) {} + +bool VariableAssignment::operator==(const VariableAssignment& other) const { + return std::tie(VarIdx, AssignedReg) == + std::tie(other.VarIdx, other.AssignedReg); +} + +bool VariableAssignment::operator<(const VariableAssignment& other) const { + return std::tie(VarIdx, AssignedReg) < + std::tie(other.VarIdx, other.AssignedReg); +} + +void dumpAssignmentChain(const llvm::MCRegisterInfo& RegInfo, + const AssignmentChain& Chain) { + for (const auto& Assignment : Chain) { + llvm::outs() << llvm::format("(%d %s) ", Assignment.VarIdx, + RegInfo.getName(Assignment.AssignedReg)); + } + llvm::outs() << "\n"; +} + +std::vector computeSequentialAssignmentChains( + const llvm::MCRegisterInfo& RegInfo, const llvm::ArrayRef Vars) { + using graph::Node; + graph::Graph graph; + + // Add register aliasing to the graph. + setupRegisterAliasing(RegInfo, graph); + + // Adding variables to the graph. + for (size_t i = 0; i < Vars.size(); ++i) { + const auto& Var = Vars[i]; + const Node node = Node::Var(i); + if (Var.IsDef) { + graph.connect(Node::In(), node); + for (const size_t reg : Var.PossibleRegisters) { + graph.connect(node, Node::Reg(reg)); + } + } + if (Var.IsUse) { + graph.connect(node, Node::Out()); + for (const size_t reg : Var.PossibleRegisters) { + graph.connect(Node::Reg(reg), node); + } + } + } + + // Find all possible dependency chains (aka all possible paths from In to Out + // node). + std::vector all_chains; + for (;;) { + const auto path = graph.getPathFrom(Node::In(), Node::Out()); + if (path.empty()) break; + switch (path.size()) { + case 0: + case 1: + case 2: + case 4: + assert(false && "Illegal state"); + break; + case 3: { // IN -> variable -> OUT + const size_t VarIdx = path[1].varValue(); + for (size_t reg : Vars[VarIdx].PossibleRegisters) { + all_chains.emplace_back(); + all_chains.back().emplace(VarIdx, reg); + } + graph.disconnect(path[0], path[1]); // IN -> variable + graph.disconnect(path[1], path[2]); // variable -> OUT + break; + } + default: { // IN -> var1 -> Reg[...] -> var2 -> OUT + const size_t last = path.size() - 1; + const size_t var1 = path[1].varValue(); + const llvm::MCPhysReg reg1 = path[2].regValue(); + const llvm::MCPhysReg reg2 = path[last - 2].regValue(); + const size_t var2 = path[last - 1].varValue(); + all_chains.emplace_back(); + all_chains.back().emplace(var1, reg1); + all_chains.back().emplace(var2, reg2); + graph.disconnect(path[1], path[2]); // var1 -> Reg[0] + break; + } + } + } + + return all_chains; +} + +std::vector getRandomAssignment( + const llvm::ArrayRef Vars, + const llvm::ArrayRef Chains, + const std::function& RandomIndexForSize) { + // registers are initialized with 0 (aka NoRegister). + std::vector registers(Vars.size(), 0); + if (Chains.empty()) { + return registers; + } + // Pick one of the chains and set registers that are fully constrained (have + // no degrees of freedom). + const size_t chain_index = RandomIndexForSize(Chains.size()); + for (const VariableAssignment Assignment : Chains[chain_index]) { + registers[Assignment.VarIdx] = Assignment.AssignedReg; + } + // Registers with remaining degrees of freedom are assigned randomly. + for (size_t i = 0; i < Vars.size(); ++i) { + llvm::MCPhysReg& reg = registers[i]; + const Variable& Var = Vars[i]; + const auto& possible_registers = Var.PossibleRegisters; + if (reg > 0 || possible_registers.empty()) continue; + reg = possible_registers[RandomIndexForSize(possible_registers.size())]; + } + return registers; +} + +namespace { + +// Finds a matching register `reg` for variable `VarIdx` and sets +// `reg_assignments[r]` to `VarIdx`. Returns false if no matching can be found. +// `seen.count(r)` is 1 if register `reg` has been processed. +bool findMatchingRegister( + const llvm::ArrayRef Vars, const size_t VarIdx, + std::unordered_set& Seen, + std::unordered_map& RegAssignments) { + for (const llvm::MCPhysReg Reg : Vars[VarIdx].PossibleRegisters) { + if (!Seen.count(Reg)) { + Seen.insert(Reg); // Mark `reg` as seen. + // If `reg` is not assigned to a variable, or if `reg` was assigned to a + // variable which has an alternate possible register, assign `reg` to + // variable `VarIdx`. Since `reg` is marked as assigned in the above line, + // `reg_assignments[r]` in the following recursive call will not get + // assigned `reg` again. + const auto AssignedVarIt = RegAssignments.find(Reg); + if (AssignedVarIt == RegAssignments.end() || + findMatchingRegister(Vars, AssignedVarIt->second, Seen, + RegAssignments)) { + RegAssignments[Reg] = VarIdx; + return true; + } + } + } + return false; +} + +} // namespace + +// This is actually a maximum bipartite matching problem: +// https://en.wikipedia.org/wiki/Matching_(graph_theory)#Bipartite_matching +// The graph has variables on the left and registers on the right, with an edge +// between variable `I` and register `reg` iff +// `Vars[I].PossibleRegisters.count(A)`. +// Note that a greedy approach won't work for cases like: +// Vars[0] PossibleRegisters={C,B} +// Vars[1] PossibleRegisters={A,B} +// Vars[2] PossibleRegisters={A,C} +// There is a feasible solution {0->B, 1->A, 2->C}, but the greedy solution is +// {0->C, 1->A, oops}. +std::vector getExclusiveAssignment( + const llvm::ArrayRef Vars) { + // `reg_assignments[r]` is the variable id that was assigned register `reg`. + std::unordered_map reg_assignments; + + for (size_t VarIdx = 0; VarIdx < Vars.size(); ++VarIdx) { + if (!Vars[VarIdx].IsReg) { + continue; + } + std::unordered_set seen; + if (!findMatchingRegister(Vars, VarIdx, seen, reg_assignments)) { + return {}; // Infeasible. + } + } + + std::vector registers(Vars.size(), 0); + for (const auto& reg_VarIdx : reg_assignments) { + registers[reg_VarIdx.second] = reg_VarIdx.first; + } + return registers; +} + +std::vector getGreedyAssignment( + const llvm::ArrayRef Vars) { + std::vector registers(Vars.size(), 0); + llvm::SmallSet assigned; + for (size_t VarIdx = 0; VarIdx < Vars.size(); ++VarIdx) { + const auto& var = Vars[VarIdx]; + if (!var.IsReg) { + continue; + } + if (var.PossibleRegisters.empty()) { + return {}; + } + // Try possible registers until an unassigned one is found. + for (const auto reg : var.PossibleRegisters) { + if (assigned.insert(reg).second) { + registers[VarIdx] = reg; + break; + } + } + // Fallback to first possible register. + if (registers[VarIdx] == 0) { + registers[VarIdx] = var.PossibleRegisters[0]; + } + } + return registers; +} + +llvm::MCInst generateMCInst(const llvm::MCInstrDesc& InstrInfo, + const llvm::ArrayRef Vars, + const llvm::ArrayRef VarRegs) { + const size_t num_operands = InstrInfo.getNumOperands(); + llvm::SmallVector operand_to_register(num_operands, 0); + + // We browse the variable and for each explicit operands we set the selected + // register in the operand_to_register array. + for (size_t i = 0; i < Vars.size(); ++i) { + for (const size_t operand_index : Vars[i].ExplicitOperands) { + operand_to_register[operand_index] = VarRegs[i]; + } + } + + // Building the instruction. + llvm::MCInstBuilder builder(InstrInfo.getOpcode()); + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + const auto& op_info = InstrInfo.opInfo_begin()[i]; + switch (op_info.OperandType) { + case llvm::MCOI::OperandType::OPERAND_REGISTER: + builder.addReg(operand_to_register[i]); + break; + case llvm::MCOI::OperandType::OPERAND_IMMEDIATE: + builder.addImm(1); + break; + default: + builder.addOperand(llvm::MCOperand()); + } + } + + return builder; +} + +} // namespace exegesis Index: tools/llvm-exegesis/LLVMBuild.txt =================================================================== --- tools/llvm-exegesis/LLVMBuild.txt +++ tools/llvm-exegesis/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./tools/LLVMBuild.txt ------------------------------------*- Conf -*--===; +;===- ./tools/llvm-exegesis/LLVMBuild.txt ----------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,45 +15,8 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = - bugpoint - dsymutil - llc - lli - llvm-ar - llvm-as - llvm-bcanalyzer - llvm-cat - llvm-cfi-verify - llvm-cov - llvm-cvtres - llvm-diff - llvm-dis - llvm-dwarfdump - llvm-dwp - llvm-extract - llvm-jitlistener - llvm-link - llvm-lto - llvm-mc - llvm-mca - llvm-mcmarkup - llvm-modextract - llvm-mt - llvm-nm - llvm-objcopy - llvm-objdump - llvm-pdbutil - llvm-profdata - llvm-rc - llvm-rtdyld - llvm-size - llvm-split - opt - verify-uselistorder - [component_0] -type = Group -name = Tools -parent = $ROOT +type = Tool +name = llvm-exegesis +parent = Tools +required_libraries = CodeGen ExecutionEngine MC MCJIT Object Support all-targets Index: tools/llvm-exegesis/Latency.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/Latency.h @@ -0,0 +1,40 @@ +//===-- Latency.h -----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure instruction latencies. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H_ + +#include "BenchmarkRunner.h" + +namespace exegesis { + +class LatencyBenchmarkRunner : public BenchmarkRunner { + public: + ~LatencyBenchmarkRunner() override; + + private: + const char* getDisplayName() const override; + + llvm::Expected> createCode( + const LLVMState& State, unsigned OpcodeIndex, unsigned NumRepetitions, + const JitFunctionContext& Context) const override; + + std::vector runMeasurements( + const LLVMState& State, const JitFunction& Function, + unsigned NumRepetitions) const override; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H_ Index: tools/llvm-exegesis/Latency.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/Latency.cpp @@ -0,0 +1,103 @@ +//===-- Latency.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Latency.h" + +#include +#include + +#include "BenchmarkResult.h" +#include "InstructionSnippetGenerator.h" +#include "PerfHelper.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Error.h" + +namespace exegesis { + +namespace { + +// TODO(courbet): Handle memory. +bool isInvalidOperand(const llvm::MCOperandInfo& OpInfo) { + switch (OpInfo.OperandType) { + default: + return true; + case llvm::MCOI::OPERAND_IMMEDIATE: + case llvm::MCOI::OPERAND_REGISTER: + return false; + } +} + +llvm::Error makeError(llvm::Twine Msg) { + return llvm::make_error(Msg, + llvm::inconvertibleErrorCode()); +} +} // namespace + +LatencyBenchmarkRunner::~LatencyBenchmarkRunner() {} + +const char* LatencyBenchmarkRunner::getDisplayName() const { return "latency"; } + +llvm::Expected> LatencyBenchmarkRunner::createCode( + const LLVMState& State, const unsigned OpcodeIndex, + const unsigned NumRepetitions, const JitFunctionContext& Context) const { + std::default_random_engine RandomEngine; + const auto GetRandomIndex = [&RandomEngine](size_t Size) { + assert(Size > 0); + return std::uniform_int_distribution<>(0, Size - 1)(RandomEngine); + }; + + const auto& InstrInfo = State.getInstrInfo(); + const auto& RegInfo = State.getRegInfo(); + const llvm::MCInstrDesc& InstrDesc = InstrInfo.get(OpcodeIndex); + for (const llvm::MCOperandInfo& OpInfo : InstrDesc.operands()) { + if (isInvalidOperand(OpInfo)) { + return makeError("Only registers and immediates are supported"); + } + } + + const auto Vars = getVariables(RegInfo, InstrDesc, Context.getReservedRegs()); + const auto AssignmentChains = + computeSequentialAssignmentChains(RegInfo, Vars); + if (AssignmentChains.empty()) { + return makeError("Unable to find a dependency chain."); + } + const auto Regs = getRandomAssignment(Vars, AssignmentChains, GetRandomIndex); + const auto Inst = generateMCInst(InstrDesc, Vars, Regs); + if (!State.canAssemble(Inst)) { + return makeError("MCInst does not assemble."); + } + return std::vector(NumRepetitions, Inst); +} + +std::vector LatencyBenchmarkRunner::runMeasurements( + const LLVMState& State, const JitFunction& Function, + const unsigned NumRepetitions) const { + // Cycle measurements include some overhead from the kernel. Repeat the + // measure several times and take the minimum value. + constexpr const int kNumMeasurements = 30; + int64_t MinLatency = std::numeric_limits::max(); + // TODO(courbet): This should be a property of the MCSchedModel. + const pfm::PerfEvent CyclesPerfEvent("UNHALTED_CORE_CYCLES"); + if (!CyclesPerfEvent.valid()) { + llvm::report_fatal_error("invalid perf event 'UNHALTED_CORE_CYCLES'"); + } + for (size_t I = 0; I < kNumMeasurements; ++I) { + pfm::Counter Counter(CyclesPerfEvent); + Counter.start(); + Function(); + Counter.stop(); + const auto Value = Counter.read(); + if (Value < MinLatency) { + MinLatency = Value; + } + } + return {{"latency", static_cast(MinLatency) / NumRepetitions}}; +} + +} // namespace exegesis Index: tools/llvm-exegesis/LlvmState.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/LlvmState.h @@ -0,0 +1,60 @@ +//===-- LlvmState.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H_ + +#include +#include + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace exegesis { + +// An object to initialize LLVM and prepare objects needed to run the +// measurements. +class LLVMState { + public: + LLVMState(); + + const std::string& getTriple() const; + + const std::string& getCpuName() const; + + const std::string& getFeatures() const; + + const llvm::MCSubtargetInfo& getSubtargetInfo() const; + + const llvm::MCInstrInfo& getInstrInfo() const; + + const llvm::MCRegisterInfo& getRegInfo() const; + + std::unique_ptr createTargetMachine() const; + + bool canAssemble(const llvm::MCInst& mc_inst) const; + + private: + std::string TheTriple; + std::string CpuName; + std::string Features; + const llvm::Target* TheTarget = nullptr; + std::unique_ptr SubtargetInfo; + std::unique_ptr InstrInfo; + std::unique_ptr RegInfo; + std::unique_ptr AsmInfo; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H_ Index: tools/llvm-exegesis/LlvmState.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/LlvmState.cpp @@ -0,0 +1,69 @@ +//===-- LlvmState.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LlvmState.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace exegesis { + +LLVMState::LLVMState() + : TheTriple(llvm::sys::getProcessTriple()), + CpuName(llvm::sys::getHostCPUName().str()) { + std::string Error; + TheTarget = llvm::TargetRegistry::lookupTarget(TheTriple, Error); + assert(TheTarget); + SubtargetInfo.reset( + TheTarget->createMCSubtargetInfo(TheTriple, CpuName, Features)); + InstrInfo.reset(TheTarget->createMCInstrInfo()); + RegInfo.reset(TheTarget->createMCRegInfo(TheTriple)); + AsmInfo.reset(TheTarget->createMCAsmInfo(*RegInfo, TheTriple)); +} + +const std::string& LLVMState::getTriple() const { return TheTriple; } +const std::string& LLVMState::getCpuName() const { return CpuName; } +const std::string& LLVMState::getFeatures() const { return Features; } + +const llvm::MCInstrInfo& LLVMState::getInstrInfo() const { return *InstrInfo; } + +const llvm::MCRegisterInfo& LLVMState::getRegInfo() const { return *RegInfo; } + +const llvm::MCSubtargetInfo& LLVMState::getSubtargetInfo() const { + return *SubtargetInfo; +} + +std::unique_ptr LLVMState::createTargetMachine() + const { + const llvm::TargetOptions Options; + return std::unique_ptr( + static_cast(TheTarget->createTargetMachine( + TheTriple, CpuName, Features, Options, llvm::Reloc::Model::Static))); +} + +bool LLVMState::canAssemble(const llvm::MCInst& mc_inst) const { + llvm::MCObjectFileInfo ObjectFileInfo; + llvm::MCContext Context(AsmInfo.get(), RegInfo.get(), &ObjectFileInfo); + std::unique_ptr CodeEmitter( + TheTarget->createMCCodeEmitter(*InstrInfo, *RegInfo, Context)); + llvm::SmallVector Tmp; + llvm::raw_svector_ostream OS(Tmp); + llvm::SmallVector Fixups; + CodeEmitter->encodeInstruction(mc_inst, OS, Fixups, *SubtargetInfo); + return Tmp.size() > 0; +} + +} // namespace exegesis Index: tools/llvm-exegesis/OperandGraph.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/OperandGraph.h @@ -0,0 +1,90 @@ +//===-- OperandGraph.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A collection of tools to model register aliasing and instruction operand. +/// This is used to find an aliasing between the input and output registers of +/// an instruction. It allows us to repeat an instruction and make sure that +/// successive instances are executed sequentially. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_OPERANDGRAPH_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_OPERANDGRAPH_H_ + +#include +#include +#include +#include + +#include "llvm/MC/MCRegisterInfo.h" + +namespace exegesis { +namespace graph { + +enum class NodeType { + VARIABLE, // An set of "tied together operands" to resolve. + REG, // A particular register. + IN, // The input node. + OUT // The output node. +}; + +// A Node in the graph, it has a type and an int value. +struct Node : public std::pair { + using std::pair::pair; + + static Node Reg(int Value) { return {NodeType::REG, Value}; } + static Node Var(int Value) { return {NodeType::VARIABLE, Value}; } + static Node In() { return {NodeType::IN, 0}; } + static Node Out() { return {NodeType::OUT, 0}; } + + NodeType type() const; + int regValue() const; // checks that type==REG and returns value. + int varValue() const; // checks that type==VARIABLE and returns value. + + void dump(const llvm::MCRegisterInfo& RegInfo) const; +}; + +// Graph represents the connectivity of registers for a particular instruction. +// This object is used to select registers that would create a dependency chain +// between instruction's input and output. +struct Graph { + public: + void connect(const Node From, const Node To); + void disconnect(const Node From, const Node To); + + // Tries to find a path between 'From' and 'To' nodes. + // Returns empty if no path is found. + std::vector getPathFrom(const Node From, const Node To) const; + + private: + // We use std::set to keep the implementation simple, using an unordered_set + // requires the definition of a hasher. + using NodeSet = std::set; + + // Performs a Depth First Search from 'current' node up until 'sentinel' node + // is found. 'path' is the recording of the traversed nodes, 'seen' is the + // collection of nodes seen so far. + bool dfs(const Node Current, const Node Sentinel, std::vector& Path, + NodeSet& Seen) const; + + // We use std::map to keep the implementation simple, using an unordered_map + // requires the definition of a hasher. + std::map AdjacencyLists; +}; + +// Add register nodes to graph and connect them when they alias. Connection is +// both ways. +void setupRegisterAliasing(const llvm::MCRegisterInfo& RegInfo, + Graph& TheGraph); + +} // namespace graph +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_OPERANDGRAPH_H_ Index: tools/llvm-exegesis/OperandGraph.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/OperandGraph.cpp @@ -0,0 +1,113 @@ +//===-- OperandGraph.cpp ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OperandGraph.h" + +#include "llvm/MC/MCRegisterInfo.h" + +namespace exegesis { +namespace graph { + +void Node::dump(const llvm::MCRegisterInfo& RegInfo) const { + switch (type()) { + case NodeType::VARIABLE: + printf(" %d", varValue()); + break; + case NodeType::REG: + printf(" %s", RegInfo.getName(regValue())); + break; + case NodeType::IN: + printf(" IN"); + break; + case NodeType::OUT: + printf(" OUT"); + break; + } +} + +NodeType Node::type() const { return first; } + +int Node::regValue() const { + assert(first == NodeType::REG); + return second; +} + +int Node::varValue() const { + assert(first == NodeType::VARIABLE); + return second; +} + +void Graph::connect(const Node From, const Node To) { + AdjacencyLists[From].insert(To); +} + +void Graph::disconnect(const Node From, const Node To) { + AdjacencyLists[From].erase(To); +} + +std::vector Graph::getPathFrom(const Node From, const Node To) const { + std::vector Path; + NodeSet Seen; + dfs(From, To, Path, Seen); + return Path; +} + +// DFS is implemented recursively, this is fine as graph size is small (~250 +// nodes, ~200 edges, longuest path depth < 10). +bool Graph::dfs(const Node Current, const Node Sentinel, + std::vector& Path, NodeSet& Seen) const { + Path.push_back(Current); + Seen.insert(Current); + if (Current == Sentinel) return true; + if (AdjacencyLists.count(Current)) { + for (const auto Next : AdjacencyLists.find(Current)->second) { + if (Seen.count(Next)) continue; + if (dfs(Next, Sentinel, Path, Seen)) return true; + } + } + Path.pop_back(); + return false; +} + +// For each Register Units we walk up their parents. +// Let's take the case of the A register family: +// +// RAX +// ^ +// EAX +// ^ +// AX +// ^ ^ +// AH AL +// +// Register Units are AH and AL. +// Walking them up gives the following lists: +// AH->AX->EAX->RAX and AL->AX->EAX->RAX +// When walking the lists we add connect current to parent both ways leading to +// the following connections: +// +// AL<->AX, AH<->AX, AX<->EAX, EAX<->RAX +// We repeat this process for all Unit Registers to cover all connections. +void setupRegisterAliasing(const llvm::MCRegisterInfo& RegInfo, + Graph& TheGraph) { + using SuperItr = llvm::MCSuperRegIterator; + for (size_t Reg = 0; Reg < RegInfo.getNumRegUnits(); ++Reg) { + auto Current = Reg; + for (SuperItr Super(Reg, &RegInfo); Super.isValid(); ++Super) { + const Node A = Node::Reg(Current); + const Node B = Node::Reg(*Super); + TheGraph.connect(A, B); + TheGraph.connect(B, A); + Current = *Super; + } + } +} + +} // namespace graph +} // namespace exegesis Index: tools/llvm-exegesis/PerfHelper.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/PerfHelper.h @@ -0,0 +1,101 @@ +//===-- PerfHelper.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Helpers for measuring perf events. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H_ + +#include + +#include "perfmon/perf_event.h" +#include "perfmon/pfmlib.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace exegesis { +namespace pfm { + +// Simple function to test for errors. +bool IsPfmError(int Code); + +// Retrieves the encoding for the event described by pfm_event_string. +// NOTE: pfm_initialize() must be called before creating PerfEvent objects. +class PerfEvent { + public: + // http://perfmon2.sourceforge.net/manv4/libpfm.html + // Events are expressed as strings. e.g. "INSTRUCTION_RETIRED" + explicit PerfEvent(llvm::StringRef pfm_event_string); + + PerfEvent(const PerfEvent&) = delete; + PerfEvent(PerfEvent&& other) = default; + + // The pfm_event_string passed at construction time. + const char* name() const; + + // Whether the event was successfully created. + bool valid() const; + + // The encoded event to be passed to the Kernel. + // Precondition: valid() must be true. + const perf_event_attr& attribute() const; + + // The fully qualified name for the event. + // e.g. "snb_ep::INSTRUCTION_RETIRED:e=0:i=0:c=0:t=0:u=1:k=0:mg=0:mh=1" + const std::string& getPfmEventString() const; + + private: + const std::string EventString; + std::string FullQualifiedEventString; + perf_event_attr Attr = {}; +}; + +// Uses a valid PerfEvent to configure the Kernel so we can measure the +// underlying event. +struct Counter { + // event: the PerfEvent to measure. + explicit Counter(const PerfEvent& event); + + Counter(const Counter&) = delete; + Counter(Counter&& other) = default; + + ~Counter(); + + void start(); // Starts the measurement of the event. + void stop(); // Stops the measurement of the event. + int64_t read() const; // Return the current value of the counter. + + private: + int FileDescriptor = -1; +}; + +// Helper to measure a list of PerfEvent for a particular function. +// callback is called for each successful measure (PerfEvent needs to be valid). +template +void Measure( + const llvm::ArrayRef Events, + const std::function& Callback, + Function Fn) { + for (const auto& Event : Events) { + if (!Event.valid()) continue; + Counter Cnt(Event); + Cnt.start(); + Fn(); + Cnt.stop(); + Callback(Event, Cnt.read()); + } +} + +} // namespace pfm +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H_ Index: tools/llvm-exegesis/PerfHelper.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/PerfHelper.cpp @@ -0,0 +1,77 @@ +//===-- PerfHelper.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PerfHelper.h" + +#include "perfmon/pfmlib_perf_event.h" +#include "llvm/Support/raw_ostream.h" + +namespace exegesis { +namespace pfm { + +bool IsPfmError(int Code) { return Code != PFM_SUCCESS; } + +PerfEvent::PerfEvent(llvm::StringRef PfmEventString) + : EventString(PfmEventString.str()) { + char* fstr = nullptr; + pfm_perf_encode_arg_t arg = {}; + arg.attr = &Attr; + arg.fstr = &fstr; + arg.size = sizeof(pfm_perf_encode_arg_t); + const int result = pfm_get_os_event_encoding(EventString.c_str(), PFM_PLM3, + PFM_OS_PERF_EVENT, &arg); + if (IsPfmError(result)) { + // We don't know beforehand which counters are available (e.g. 6 uops ports + // on Sandybridge but 8 on Haswell) so we report the missing counter without + // crashing. + llvm::errs() << pfm_strerror(result) << " - cannot create event " + << EventString; + } + if (fstr) { + FullQualifiedEventString = fstr; + free(fstr); + } +} + +const char* PerfEvent::name() const { return EventString.c_str(); } + +bool PerfEvent::valid() const { return !FullQualifiedEventString.empty(); } + +const perf_event_attr& PerfEvent::attribute() const { return Attr; } + +const std::string& PerfEvent::getPfmEventString() const { + return FullQualifiedEventString; +} + +Counter::Counter(const PerfEvent& Event) { + const pid_t Pid = 0; // measure current process/thread. + const int Cpu = -1; // measure any processor. + const int GroupFd = -1; // no grouping of counters. + const uint32_t Flags = 0; + perf_event_attr AttrCopy = Event.attribute(); + FileDescriptor = perf_event_open(&AttrCopy, Pid, Cpu, GroupFd, Flags); + assert(FileDescriptor != -1 && + "Unable to open event, make sure your kernel allows user space perf " + "monitoring."); +} + +Counter::~Counter() { close(FileDescriptor); } + +void Counter::start() { ioctl(FileDescriptor, PERF_EVENT_IOC_RESET, 0); } + +void Counter::stop() { ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); } + +int64_t Counter::read() const { + int64_t Count = 0; + ::read(FileDescriptor, &Count, sizeof(Count)); + return Count; +} + +} // namespace pfm +} // namespace exegesis Index: tools/llvm-exegesis/README.txt =================================================================== --- /dev/null +++ tools/llvm-exegesis/README.txt @@ -0,0 +1 @@ +A Index: tools/llvm-exegesis/Uops.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/Uops.h @@ -0,0 +1,40 @@ +//===-- Uops.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure uop decomposition. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H_ + +#include "BenchmarkRunner.h" + +namespace exegesis { + +class UopsBenchmarkRunner : public BenchmarkRunner { + public: + ~UopsBenchmarkRunner() override; + + private: + const char* getDisplayName() const override; + + llvm::Expected> createCode( + const LLVMState& State, unsigned OpcodeIndex, unsigned NumRepetitions, + const JitFunctionContext& Context) const override; + + std::vector runMeasurements( + const LLVMState& State, const JitFunction& Function, + unsigned NumRepetitions) const override; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H_ Index: tools/llvm-exegesis/Uops.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/Uops.cpp @@ -0,0 +1,260 @@ +//===-- Uops.cpp ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Uops.h" + +#include +#include +#include +#include + +#include "BenchmarkResult.h" +#include "InstructionSnippetGenerator.h" +#include "PerfHelper.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Error.h" + +namespace exegesis { + +namespace { + +// TODO(courbet): Handle memory. +bool isInvalidOperand(const llvm::MCOperandInfo& OpInfo) { + switch (OpInfo.OperandType) { + default: + return true; + case llvm::MCOI::OPERAND_IMMEDIATE: + case llvm::MCOI::OPERAND_REGISTER: + return false; + } +} + +llvm::Error makeError(llvm::Twine Msg) { + return llvm::make_error(Msg, + llvm::inconvertibleErrorCode()); +} + +// TODO(courbet): Remove this. Counter names should be optional properties of +// ProcResUnits. +const std::string* getEventNameFromProcResName(const char* ProcResName) { + static const std::unordered_map Entries = { + {"SBPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"SBPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"SBPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"SBPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"HWPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"HWPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"HWPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, + {"HWPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, + {"HWPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"HWPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"HWPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, + {"HWPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, + {"SKLPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"SKLPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"SKLPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, + {"SKLPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, + {"SKLPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"SKLPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"SKLPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, + {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, + {"SKXPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"SKXPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"SKXPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, + {"SKXPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, + {"SKXPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"SKXPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"SKXPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, + {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, + }; + const auto It = Entries.find(ProcResName); + return It == Entries.end() ? nullptr : &It->second; +} + +std::vector generateIndependentAssignments( + const LLVMState& State, const llvm::MCInstrDesc& InstrDesc, + llvm::SmallVector Vars, int MaxAssignments) { + std::unordered_set IsUsedByAnyVar; + for (const auto& Var : Vars) { + if (Var.IsUse) { + IsUsedByAnyVar.insert(Var.PossibleRegisters.begin(), + Var.PossibleRegisters.end()); + } + } + + std::vector pattern; + for (int A = 0; A < MaxAssignments; ++A) { + // TODO(courbet): This is a bit pessimistic. We should get away with an + // assignment that ensures that the set of assigned registers for uses and + // the set of assigned registers for defs do not intersect (registers + // for uses (resp defs) do not have to be all distinct). + const auto Regs = getExclusiveAssignment(Vars); + if (Regs.empty()) { + break; + } + // Remove all assigned registers defs that are used by at least one other + // variable from the list of possible variable registers. This ensures that + // we never create a RAW hazard that would lead to serialization. + for (size_t i = 0; i < Vars.size(); ++i) { + llvm::MCPhysReg Reg = Regs[i]; + if (Vars[i].IsDef && IsUsedByAnyVar.count(Reg)) { + Vars[i].PossibleRegisters.remove(Reg); + } + } + // Create an MCInst and check assembly. + auto Inst = generateMCInst(InstrDesc, Vars, Regs); + if (!State.canAssemble(Inst)) { + continue; + } + pattern.push_back(std::move(Inst)); + } + return pattern; +} +} // namespace + +UopsBenchmarkRunner::~UopsBenchmarkRunner() {} + +const char* UopsBenchmarkRunner::getDisplayName() const { return "uops"; } + +llvm::Expected> UopsBenchmarkRunner::createCode( + const LLVMState& State, const unsigned OpcodeIndex, + const unsigned NumRepetitions, const JitFunctionContext& Context) const { + const auto& InstrInfo = State.getInstrInfo(); + const auto& RegInfo = State.getRegInfo(); + const llvm::MCInstrDesc& InstrDesc = InstrInfo.get(OpcodeIndex); + for (const llvm::MCOperandInfo& OpInfo : InstrDesc.operands()) { + if (isInvalidOperand(OpInfo)) { + return makeError("Only registers and immediates are supported"); + } + } + + // TODO(courbet): Load constants into registers (e.g. with fld1) to not break + // instructions like x87. + + // TODO(courbet): Move that to an external doc. + // Ideally we would like the only limitation on executing uops to be the issue + // ports. Maximizing port pressure increases the likelihood that the load is + // distributed evenly across possible ports. + + // To achieve that, one approach is to generate instructions that do not have + // data dependencies between them. + // + // For some instructions, this is trivial: + // mov rax, qword ptr [rsi] + // mov rax, qword ptr [rsi] + // mov rax, qword ptr [rsi] + // mov rax, qword ptr [rsi] + // For the above snippet, haswell just renames rax four times and executes the + // four instructions two at a time on P23 and P0126. + // + // For some instructions, we just need to make sure that the source is + // different from the destination. For example, IDIV8r reads from GPR and + // writes to AX. We just need to ensure that the variable is assigned a + // register which is different from AX: + // idiv bx + // idiv bx + // idiv bx + // idiv bx + // The above snippet will be able to fully saturate the ports, while the same + // with ax would issue one uop every `latency(IDIV8r)` cycles. + // + // Some instructions make this harder because they both read and write from + // the same register: + // inc rax + // inc rax + // inc rax + // inc rax + // This has a data dependency from each instruction to the next, limit the + // number of instructions that can be issued in parallel. + // It turns out that this is not a big issue on recent Intel CPUs because they + // have heuristics to balance port pressure. In the snippet above, subsequent + // instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs + // might end up executing them all on P0 (just because they can), or try + // avoiding P5 because it's usually under high pressure from vector + // instructions. + // This issue is even more important for high-latency instructions because + // they increase the idle time of the CPU, e.g. : + // imul rax, rbx + // imul rax, rbx + // imul rax, rbx + // imul rax, rbx + // + // To avoid that, we do the renaming statically by generating as many + // independent exclusive assignments as possible (until all possible registers + // are exhausted) e.g.: + // imul rax, rbx + // imul rcx, rbx + // imul rdx, rbx + // imul r8, rbx + // + // Some instruction even make the above static renaming impossible because + // they implicitly read and write from the same operand, e.g. ADC16rr reads + // and writes from EFLAGS. + // In that case we just use a greedy register assignment and hope for the + // best. + + const auto Vars = getVariables(RegInfo, InstrDesc, Context.getReservedRegs()); + + // Generate as many independent exclusive assignments as possible. + constexpr const int kMaxStaticRenames = 20; + std::vector Pattern = + generateIndependentAssignments(State, InstrDesc, Vars, kMaxStaticRenames); + if (Pattern.empty()) { + // We don't even have a single exclusive assignment, fallback to a greedy + // assignment. + // TODO(courbet): Tell the user about this decision. + const auto Regs = getGreedyAssignment(Vars); + if (Regs.empty()) { + return makeError("No feasible greedy assignment"); + } + auto Inst = generateMCInst(InstrDesc, Vars, Regs); + if (!State.canAssemble(Inst)) { + return makeError("Cannot assemble greedy assignment"); + } + Pattern.push_back(std::move(Inst)); + } + + // Generate repetitions of the pattern until benchmark_iterations is reached. + std::vector Result; + Result.reserve(NumRepetitions); + for (unsigned i = 0; i < NumRepetitions; ++i) { + Result.push_back(Pattern[i % Pattern.size()]); + } + return Result; +} + +std::vector UopsBenchmarkRunner::runMeasurements( + const LLVMState& State, const JitFunction& Function, + const unsigned NumRepetitions) const { + const auto& SchedModel = State.getSubtargetInfo().getSchedModel(); + + std::vector Result; + for (unsigned ProcResIdx = 1; + ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) { + const llvm::MCProcResourceDesc& ProcRes = + *SchedModel.getProcResource(ProcResIdx); + const std::string* const EventName = + getEventNameFromProcResName(ProcRes.Name); + if (!EventName) { + continue; + } + pfm::Counter Counter{pfm::PerfEvent(*EventName)}; + Counter.start(); + Function(); + Counter.stop(); + Result.push_back({llvm::itostr(ProcResIdx), + static_cast(Counter.read()) / NumRepetitions, + ProcRes.Name}); + } + return Result; +} + +} // namespace exegesis Index: tools/llvm-exegesis/X86.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/X86.h @@ -0,0 +1,32 @@ +//===-- X86.h ---------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// X86 target-specific setup. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_X86_H_ +#define LLVM_TOOLS_LLVM_EXEGESIS_X86_H_ + +#include "BenchmarkRunner.h" +#include "LlvmState.h" + +namespace exegesis { + +class X86Filter : public BenchmarkRunner::InstructionFilter { + public: + ~X86Filter() override; + + llvm::Error shouldRun(const LLVMState& State, unsigned Opcode) const override; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_X86_H_ Index: tools/llvm-exegesis/X86.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/X86.cpp @@ -0,0 +1,43 @@ +//===-- X86.cpp --------------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" + +namespace exegesis { +namespace { + +llvm::Error makeError(llvm::Twine Msg) { + return llvm::make_error(Msg, + llvm::inconvertibleErrorCode()); +} + +} // namespace + +X86Filter::~X86Filter() {} + +// Test whether we can generate a snippet for this instruction. +llvm::Error X86Filter::shouldRun(const LLVMState& State, + const unsigned Opcode) const { + const auto& InstrInfo = State.getInstrInfo(); + const llvm::MCInstrDesc& InstrDesc = InstrInfo.get(Opcode); + if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) { + return makeError("Unsupported opcode: isBranch/isIndirectBranch"); + } + if (InstrDesc.isCall() || InstrDesc.isReturn()) { + return makeError("Unsupported opcode: isCall/isReturn"); + } + const auto OpcodeName = InstrInfo.getName(Opcode); + if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") || + OpcodeName.startswith("ADJCALLSTACK")) { + return makeError("Unsupported opcode: Push/Pop/AdjCallStack"); + } + return llvm::ErrorSuccess(); +} + +} // namespace exegesis Index: tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/llvm-exegesis.cpp @@ -0,0 +1,118 @@ +//===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Measures execution properties (latencies/uops) of an instruction. +/// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include "BenchmarkResult.h" +#include "BenchmarkRunner.h" +#include "InMemoryAssembler.h" +#include "InstructionSnippetGenerator.h" +#include "Latency.h" +#include "LlvmState.h" +#include "OperandGraph.h" +#include "PerfHelper.h" +#include "Uops.h" +#include "X86.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" + +static llvm::cl::opt OpcodeIndex( + "opcode-index", llvm::cl::desc("opcode to measure, by index"), + llvm::cl::init(0)); + +static llvm::cl::opt OpcodeName( + "opcode-name", llvm::cl::desc("opcode to measure, by name"), + llvm::cl::init("")); + +enum class BenchmarkModeE { Latency, Uops }; +static llvm::cl::opt BenchmarkMode( + "benchmark-mode", llvm::cl::desc("the benchmark mode to run"), + llvm::cl::values( + clEnumValN(BenchmarkModeE::Latency, "latency", "Instruction Latency"), + clEnumValN(BenchmarkModeE::Uops, "uops", "Uop Decomposition"))); + +static llvm::cl::opt NumRepetitions( + "num-repetitions", + llvm::cl::desc("number of time to repeat the asm snippet"), + llvm::cl::init(10000)); + +namespace exegesis { + +void main() { + if (OpcodeName.empty() == (OpcodeIndex == 0)) { + llvm::report_fatal_error( + "please provide one and only one of 'opcode-index' or 'opcode-name' "); + } + + LLVMInitializeX86Target(); + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86AsmPrinter(); + + // FIXME: Target-specific filter. + X86Filter Filter; + + const LLVMState State; + + unsigned Opcode = OpcodeIndex; + if (Opcode == 0) { + // Resolve opcode name -> opcode. + for (unsigned i = 0; i < State.getInstrInfo().getNumOpcodes(); ++i) { + if (State.getInstrInfo().getName(i) == OpcodeName) { + Opcode = i; + break; + } + } + if (Opcode == 0) { + llvm::report_fatal_error( + llvm::Twine("unknown opcode ").concat(OpcodeName)); + } + } + + std::unique_ptr Runner; + switch (BenchmarkMode) { + case BenchmarkModeE::Latency: + Runner = llvm::make_unique(); + break; + case BenchmarkModeE::Uops: + Runner = llvm::make_unique(); + break; + } + + Runner->run(State, Opcode, NumRepetitions > 0 ? NumRepetitions : 1, Filter) + .writeYamlOrDie("-"); +} + +} // namespace exegesis + +int main(int argc, char** argv) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + + if (exegesis::pfm::IsPfmError(pfm_initialize())) { + llvm::errs() << "cannot initialize libpfm\n"; + return EXIT_FAILURE; + } + + exegesis::main(); + + pfm_terminate(); + return EXIT_SUCCESS; +} Index: tools/llvm-exegesis/llvm-exegesis.rst =================================================================== --- /dev/null +++ tools/llvm-exegesis/llvm-exegesis.rst @@ -0,0 +1,59 @@ +llvm-exegesis - LLVM Machine Instruction Benchmark +================================================== + +SYNOPSIS +-------- + +:program:`llvm-exegesis` [*options*] + +DESCRIPTION +----------- + +:program:`llvm-exegesis` is a benchmarking tool that uses information available +in LLVM to measure host machine instruction characteristics like latency or port +decomposition. + + +Given an LLVM opcode name and a benchmarking mode, :program:`llvm-exegesis` +generates a code snippet that makes execution as serial (resp. as parallel) as +possible so that we can measure the latency (resp. uop decomposition) of the +instruction. +The code snippet is jitted and executed on the host subtarget. The time taken +(resp. resource usage) is measured using hardware performance counters. The +result is printed out as YAML to the standard output. + +The main goal of this tool is to automatically (in)validate the LLVM's TableDef +scheduling models. + +OPTIONS +------- + +.. option:: -help + + Print a summary of command line options. + +.. option:: -opcode-index= + + Specify the opcode to measure, by index. + Either `opcode-index` or `opcode-name` must be set. + +.. option:: -opcode-name= + + Specify the opcode to measure, by name. + Either `opcode-index` or `opcode-name` must be set. + +.. option:: -benchmark-mode=[Latency|Uops] + + Specify which characteristic of the opcode to measure. + +.. option:: -num-repetitions= + + Specify the number of repetitions of the asm snippet. + Higher values lead to more accurate measurements but lengthen the benchmark. + + +EXIT STATUS +----------- + +:program:`llvm-exegesis` returns 0 on success. Otherwise, an error message is +printed to standard error, and the tool returns a non 0 value.