Index: cmake/config-ix.cmake =================================================================== --- cmake/config-ix.cmake +++ cmake/config-ix.cmake @@ -115,6 +115,7 @@ endif() check_library_exists(dl dlopen "" HAVE_LIBDL) check_library_exists(rt clock_gettime "" HAVE_LIBRT) + check_library_exists(pfm pfm_initialize "" HAVE_LIBPFM) endif() if(HAVE_LIBPTHREAD) Index: docs/CommandGuide/index.rst =================================================================== --- docs/CommandGuide/index.rst +++ docs/CommandGuide/index.rst @@ -53,5 +53,6 @@ tblgen lit llvm-build + llvm-exegesis llvm-pdbutil llvm-readobj Index: docs/CommandGuide/llvm-exegesis.rst =================================================================== --- /dev/null +++ docs/CommandGuide/llvm-exegesis.rst @@ -0,0 +1,58 @@ +llvm-exegesis - LLVM Machine Instruction Benchmark +================================================== + +SYNOPSIS +-------- + +:program:`llvm-exegesis` [*options*] + +DESCRIPTION +----------- + +:program:`llvm-exegesis` is a benchmarking tool that uses information available +in LLVM to measure host machine instruction characteristics like latency or port +decomposition. + +Given an LLVM opcode name and a benchmarking mode, :program:`llvm-exegesis` +generates a code snippet that makes execution as serial (resp. as parallel) as +possible so that we can measure the latency (resp. uop decomposition) of the +instruction. +The code snippet is jitted and executed on the host subtarget. The time taken +(resp. resource usage) is measured using hardware performance counters. The +result is printed out as YAML to the standard output. + +The main goal of this tool is to automatically (in)validate the LLVM's TableDef +scheduling models. + +OPTIONS +------- + +.. option:: -help + + Print a summary of command line options. + +.. option:: -opcode-index= + + Specify the opcode to measure, by index. + Either `opcode-index` or `opcode-name` must be set. + +.. option:: -opcode-name= + + Specify the opcode to measure, by name. + Either `opcode-index` or `opcode-name` must be set. + +.. option:: -benchmark-mode=[Latency|Uops] + + Specify which characteristic of the opcode to measure. + +.. option:: -num-repetitions= + + Specify the number of repetitions of the asm snippet. + Higher values lead to more accurate measurements but lengthen the benchmark. + + +EXIT STATUS +----------- + +:program:`llvm-exegesis` returns 0 on success. Otherwise, an error message is +printed to standard error, and the tool returns a non 0 value. Index: docs/ReleaseNotes.rst =================================================================== --- docs/ReleaseNotes.rst +++ docs/ReleaseNotes.rst @@ -42,6 +42,9 @@ * The LoopInstSimplify pass (-loop-instsimplify) has been removed. +* A new ``llvm-exegesis`` tool has been added to automatically measure + instruction scheduling properties (latency/uops). + * Note.. .. NOTE Index: include/llvm/Config/config.h.cmake =================================================================== --- include/llvm/Config/config.h.cmake +++ include/llvm/Config/config.h.cmake @@ -107,6 +107,9 @@ /* Define to 1 if you have the `edit' library (-ledit). */ #cmakedefine HAVE_LIBEDIT ${HAVE_LIBEDIT} +/* Define to 1 if you have the `pfm' library (-lpfm). */ +#cmakedefine HAVE_LIBPFM ${HAVE_LIBPFM} + /* Define to 1 if you have the `psapi' library (-lpsapi). */ #cmakedefine HAVE_LIBPSAPI ${HAVE_LIBPSAPI} Index: tools/LLVMBuild.txt =================================================================== --- tools/LLVMBuild.txt +++ tools/LLVMBuild.txt @@ -32,6 +32,7 @@ llvm-dis llvm-dwarfdump llvm-dwp + llvm-exegesis llvm-extract llvm-jitlistener llvm-link Index: tools/llvm-exegesis/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-exegesis/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmPrinters + AllTargetsDescs + AllTargetsInfos + X86 + ) + +add_llvm_tool(llvm-exegesis + llvm-exegesis.cpp + ) + +add_subdirectory(lib) +target_link_libraries(llvm-exegesis PRIVATE LLVMExegesis) + +if(HAVE_LIBPFM) + target_link_libraries(llvm-exegesis PRIVATE pfm) +endif() + Index: tools/llvm-exegesis/LLVMBuild.txt =================================================================== --- tools/llvm-exegesis/LLVMBuild.txt +++ tools/llvm-exegesis/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./tools/LLVMBuild.txt ------------------------------------*- Conf -*--===; +;===- ./tools/llvm-exegesis/LLVMBuild.txt ----------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,45 +15,8 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = - bugpoint - dsymutil - llc - lli - llvm-ar - llvm-as - llvm-bcanalyzer - llvm-cat - llvm-cfi-verify - llvm-cov - llvm-cvtres - llvm-diff - llvm-dis - llvm-dwarfdump - llvm-dwp - llvm-extract - llvm-jitlistener - llvm-link - llvm-lto - llvm-mc - llvm-mca - llvm-mcmarkup - llvm-modextract - llvm-mt - llvm-nm - llvm-objcopy - llvm-objdump - llvm-pdbutil - llvm-profdata - llvm-rc - llvm-rtdyld - llvm-size - llvm-split - opt - verify-uselistorder - [component_0] -type = Group -name = Tools -parent = $ROOT +type = Tool +name = llvm-exegesis +parent = Tools +required_libraries = CodeGen ExecutionEngine MC MCJIT Object Support all-targets Index: tools/llvm-exegesis/lib/BenchmarkResult.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/BenchmarkResult.h @@ -0,0 +1,53 @@ +//===-- BenchmarkResult.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines classes to represent measurements and serialize/deserialize them to +// Yaml. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include + +namespace exegesis { + +struct AsmTemplate { + std::string Name; +}; + +struct BenchmarkMeasure { + std::string Key; + double Value; + std::string DebugString; +}; + +// The result of an instruction benchmark. +struct InstructionBenchmark { + AsmTemplate AsmTmpl; + std::string CpuName; + std::string LLVMTriple; + size_t NumRepetitions = 0; + std::vector Measurements; + std::string Error; + + static InstructionBenchmark readYamlOrDie(llvm::StringRef Filename); + + // Unfortunately this function is non const because of YAML traits. + void writeYamlOrDie(const llvm::StringRef Filename); +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H Index: tools/llvm-exegesis/lib/BenchmarkResult.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/BenchmarkResult.cpp @@ -0,0 +1,85 @@ +//===-- BenchmarkResult.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkResult.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +// Defining YAML traits for IO. +namespace llvm { +namespace yaml { + +// std::vector will be rendered as a list. +template <> struct SequenceElementTraits { + static const bool flow = false; +}; + +// exegesis::Measure is rendererd as a flow instead of a list. +// e.g. { "key": "the key", "value": 0123 } +template <> struct MappingTraits { + static void mapping(IO &Io, exegesis::BenchmarkMeasure &Obj) { + Io.mapRequired("key", Obj.Key); + Io.mapRequired("value", Obj.Value); + Io.mapOptional("debug_string", Obj.DebugString); + } + static const bool flow = true; +}; + +template <> struct MappingTraits { + static void mapping(IO &Io, exegesis::AsmTemplate &Obj) { + Io.mapRequired("name", Obj.Name); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &Io, exegesis::InstructionBenchmark &Obj) { + Io.mapRequired("asm_template", Obj.AsmTmpl); + Io.mapRequired("cpu_name", Obj.CpuName); + Io.mapRequired("llvm_triple", Obj.LLVMTriple); + Io.mapRequired("num_repetitions", Obj.NumRepetitions); + Io.mapRequired("measurements", Obj.Measurements); + Io.mapRequired("error", Obj.Error); + } +}; + +} // namespace yaml +} // namespace llvm + +namespace exegesis { + +InstructionBenchmark +InstructionBenchmark::readYamlOrDie(llvm::StringRef Filename) { + std::unique_ptr MemBuffer = llvm::cantFail( + llvm::errorOrToExpected(llvm::MemoryBuffer::getFile(Filename))); + llvm::yaml::Input Yin(*MemBuffer); + InstructionBenchmark Benchmark; + Yin >> Benchmark; + return Benchmark; +} + +void InstructionBenchmark::writeYamlOrDie(const llvm::StringRef Filename) { + if (Filename == "-") { + llvm::yaml::Output Yout(llvm::outs()); + Yout << *this; + } else { + llvm::SmallString<1024> Buffer; + llvm::raw_svector_ostream Ostr(Buffer); + llvm::yaml::Output Yout(Ostr); + Yout << *this; + std::unique_ptr File = + llvm::cantFail(llvm::FileOutputBuffer::create(Filename, Buffer.size())); + memcpy(File->getBufferStart(), Buffer.data(), Buffer.size()); + llvm::cantFail(File->commit()); + } +} + +} // namespace exegesis Index: tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -0,0 +1,64 @@ +//===-- BenchmarkRunner.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the abstract BenchmarkRunner class for measuring a certain execution +/// property of instructions (e.g. latency). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H + +#include "BenchmarkResult.h" +#include "InMemoryAssembler.h" +#include "LlvmState.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Error.h" +#include + +namespace exegesis { + +// Common code for all benchmark modes. +class BenchmarkRunner { +public: + // Subtargets can disable running benchmarks for some instructions by + // returning an error here. + class InstructionFilter { + public: + virtual ~InstructionFilter(); + + virtual llvm::Error shouldRun(const LLVMState &State, + unsigned Opcode) const { + return llvm::ErrorSuccess(); + } + }; + + virtual ~BenchmarkRunner(); + + InstructionBenchmark run(const LLVMState &State, unsigned Opcode, + unsigned NumRepetitions, + const InstructionFilter &Filter) const; + +private: + virtual const char *getDisplayName() const = 0; + + virtual llvm::Expected> + createCode(const LLVMState &State, unsigned OpcodeIndex, + unsigned NumRepetitions, + const JitFunctionContext &Context) const = 0; + + virtual std::vector + runMeasurements(const LLVMState &State, const JitFunction &Function, + unsigned NumRepetitions) const = 0; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -0,0 +1,79 @@ +//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkRunner.h" +#include "InMemoryAssembler.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include + +namespace exegesis { + +BenchmarkRunner::InstructionFilter::~InstructionFilter() = default; + +BenchmarkRunner::~BenchmarkRunner() = default; + +InstructionBenchmark +BenchmarkRunner::run(const LLVMState &State, const unsigned Opcode, + unsigned NumRepetitions, + const InstructionFilter &Filter) const { + InstructionBenchmark InstrBenchmark; + + InstrBenchmark.AsmTmpl.Name = + llvm::Twine(getDisplayName()) + .concat(" ") + .concat(State.getInstrInfo().getName(Opcode)) + .str(); + InstrBenchmark.CpuName = State.getCpuName(); + InstrBenchmark.LLVMTriple = State.getTriple(); + InstrBenchmark.NumRepetitions = NumRepetitions; + + // Ignore instructions that we cannot run. + if (State.getInstrInfo().get(Opcode).isPseudo()) { + InstrBenchmark.Error = "Unsupported opcode: isPseudo"; + return InstrBenchmark; + } + if (llvm::Error E = Filter.shouldRun(State, Opcode)) { + InstrBenchmark.Error = llvm::toString(std::move(E)); + return InstrBenchmark; + } + + JitFunctionContext Context(State.createTargetMachine()); + auto ExpectedInstructions = + createCode(State, Opcode, NumRepetitions, Context); + if (llvm::Error E = ExpectedInstructions.takeError()) { + InstrBenchmark.Error = llvm::toString(std::move(E)); + return InstrBenchmark; + } + + const std::vector Instructions = *ExpectedInstructions; + const JitFunction Function(std::move(Context), Instructions); + const llvm::StringRef CodeBytes = Function.getFunctionBytes(); + + std::string AsmExcerpt; + constexpr const int kExcerptSize = 100; + constexpr const int kExcerptTailSize = 10; + if (CodeBytes.size() <= kExcerptSize) { + AsmExcerpt = llvm::toHex(CodeBytes); + } else { + AsmExcerpt = + llvm::toHex(CodeBytes.take_front(kExcerptSize - kExcerptTailSize + 3)); + AsmExcerpt += "..."; + AsmExcerpt += llvm::toHex(CodeBytes.take_back(kExcerptTailSize)); + } + llvm::outs() << "# Asm excerpt: " << AsmExcerpt << "\n"; + llvm::outs().flush(); // In case we crash. + + InstrBenchmark.Measurements = + runMeasurements(State, Function, NumRepetitions); + return InstrBenchmark; +} + +} // namespace exegesis Index: tools/llvm-exegesis/lib/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/CMakeLists.txt @@ -0,0 +1,25 @@ +add_library(LLVMExegesis + STATIC + BenchmarkResult.cpp + BenchmarkRunner.cpp + InMemoryAssembler.cpp + InstructionSnippetGenerator.cpp + Latency.cpp + LlvmState.cpp + OperandGraph.cpp + PerfHelper.cpp + Uops.cpp + X86.cpp + ) + +llvm_update_compile_flags(LLVMExegesis) +llvm_map_components_to_libnames(libs + CodeGen + ExecutionEngine + MC + MCJIT + Support + ) + +target_link_libraries(LLVMExegesis ${libs}) +set_target_properties(LLVMExegesis PROPERTIES FOLDER "Libraries") Index: tools/llvm-exegesis/lib/InMemoryAssembler.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/InMemoryAssembler.h @@ -0,0 +1,78 @@ +//===-- InMemoryAssembler.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines classes to assemble functions composed of a single basic block of +/// MCInsts. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_INMEMORYASSEMBLER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_INMEMORYASSEMBLER_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCInst.h" +#include +#include + +namespace exegesis { + +// Consumable context for JitFunction below. +// This temporary object allows for retrieving MachineFunction properties before +// assembling it. +class JitFunctionContext { +public: + explicit JitFunctionContext(std::unique_ptr TM); + // Movable + JitFunctionContext(JitFunctionContext &&) = default; + JitFunctionContext &operator=(JitFunctionContext &&) = default; + // Non copyable + JitFunctionContext(const JitFunctionContext &) = delete; + JitFunctionContext &operator=(const JitFunctionContext &) = delete; + + const llvm::BitVector &getReservedRegs() const { return ReservedRegs; } + +private: + friend class JitFunction; + + std::unique_ptr Context; + std::unique_ptr TM; + std::unique_ptr MMI; + std::unique_ptr Module; + llvm::MachineFunction *MF = nullptr; + llvm::BitVector ReservedRegs; +}; + +// Creates a void() function from a sequence of llvm::MCInst. +class JitFunction { +public: + // Assembles Instructions into an executable function. + JitFunction(JitFunctionContext &&Context, + llvm::ArrayRef Instructions); + + // Retrieves the function as an array of bytes. + llvm::StringRef getFunctionBytes() const { return FunctionBytes; } + + // Retrieves the callable function. + void operator()() const { ((void (*)())FunctionBytes.data())(); } + +private: + JitFunctionContext FunctionContext; + std::unique_ptr ExecEngine; + llvm::StringRef FunctionBytes; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_INMEMORYASSEMBLER_H Index: tools/llvm-exegesis/lib/InMemoryAssembler.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/InMemoryAssembler.cpp @@ -0,0 +1,231 @@ +//===-- InMemoryAssembler.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InMemoryAssembler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/MCJIT.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/PassInfo.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace exegesis { + +static constexpr const char kModuleID[] = "ExegesisInfoTest"; +static constexpr const char kFunctionID[] = "foo"; + +// Small utility function to add named passes. +static bool addPass(llvm::PassManagerBase &PM, llvm::StringRef PassName, + llvm::TargetPassConfig &TPC) { + const llvm::PassRegistry *PR = llvm::PassRegistry::getPassRegistry(); + const llvm::PassInfo *PI = PR->getPassInfo(PassName); + if (!PI) { + llvm::errs() << " run-pass " << PassName << " is not registered.\n"; + return true; + } + + llvm::Pass *P; + if (PI->getNormalCtor()) { + P = PI->getNormalCtor()(); + } else { + llvm::errs() << " cannot create pass: " << PI->getPassName() << "\n"; + return true; + } + std::string Banner = std::string("After ") + std::string(P->getPassName()); + PM.add(P); + TPC.printAndVerify(Banner); + + return false; +} + +// Creates a void MachineFunction with no argument. +static llvm::MachineFunction & +createVoidVoidMachineFunction(llvm::StringRef FunctionID, llvm::Module *Module, + llvm::MachineModuleInfo *MMI) { + llvm::Type *const ReturnType = llvm::Type::getInt32Ty(Module->getContext()); + llvm::FunctionType *FunctionType = llvm::FunctionType::get(ReturnType, false); + llvm::Function *const F = llvm::Function::Create( + FunctionType, llvm::GlobalValue::InternalLinkage, FunctionID, Module); + // Making sure we can create a MachineFunction out of this Function even if it + // contains no IR. + F->setIsMaterializable(true); + return MMI->getOrCreateMachineFunction(*F); +} + +static llvm::object::OwningBinary +assemble(llvm::Module *Module, std::unique_ptr MMI, + llvm::LLVMTargetMachine *LLVMTM) { + llvm::legacy::PassManager PM; + llvm::MCContext &Context = MMI->getContext(); + + llvm::TargetLibraryInfoImpl TLII(llvm::Triple(Module->getTargetTriple())); + PM.add(new llvm::TargetLibraryInfoWrapperPass(TLII)); + + llvm::TargetPassConfig *TPC = LLVMTM->createPassConfig(PM); + PM.add(TPC); + PM.add(MMI.release()); + TPC->printAndVerify("MachineFunctionGenerator::assemble"); + // Adding the following passes: + // - machineverifier: checks that the MachineFunction is well formed. + // - prologepilog: saves and restore callee saved registers. + for (const char *PassName : {"machineverifier", "prologepilog"}) { + if (addPass(PM, PassName, *TPC)) { + llvm::report_fatal_error("Unable to add a mandatory pass"); + } + } + TPC->setInitialized(); + + llvm::SmallVector AsmBuffer; + llvm::raw_svector_ostream AsmStream(AsmBuffer); + // AsmPrinter is responsible for generating the assembly into AsmBuffer. + if (LLVMTM->addAsmPrinter(PM, AsmStream, llvm::TargetMachine::CGFT_ObjectFile, + Context)) { + llvm::report_fatal_error("Cannot add AsmPrinter passes"); + } + + PM.run(*Module); // Run all the passes + + // Storing the generated assembly into a MemoryBuffer that owns the memory. + std::unique_ptr Buffer = + llvm::MemoryBuffer::getMemBufferCopy(AsmStream.str()); + // Create the ObjectFile from the MemoryBuffer. + std::unique_ptr Obj = llvm::cantFail( + llvm::object::ObjectFile::createObjectFile(Buffer->getMemBufferRef())); + // Returning both the MemoryBuffer and the ObjectFile. + return llvm::object::OwningBinary( + std::move(Obj), std::move(Buffer)); +} + +static void fillMachineFunction(llvm::MachineFunction &MF, + llvm::ArrayRef Instructions) { + llvm::MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); + MF.push_back(MBB); + const llvm::MCInstrInfo *MCII = MF.getTarget().getMCInstrInfo(); + const llvm::DebugLoc DL; + for (const llvm::MCInst &Inst : Instructions) { + const unsigned Opcode = Inst.getOpcode(); + const llvm::MCInstrDesc &MCID = MCII->get(Opcode); + llvm::MachineInstrBuilder Builder = llvm::BuildMI(MBB, DL, MCID); + for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) { + const llvm::MCOperand &Op = Inst.getOperand(OpIndex); + if (Op.isReg()) { + const bool isDef = OpIndex < MCID.getNumDefs(); + unsigned Flags = 0; + const llvm::MCOperandInfo &OpInfo = MCID.operands().begin()[OpIndex]; + if (isDef && !OpInfo.isOptionalDef()) { + Flags |= llvm::RegState::Define; + } + Builder.addReg(Op.getReg(), Flags); + } else if (Op.isImm()) { + Builder.addImm(Op.getImm()); + } else { + llvm_unreachable("Not yet implemented"); + } + } + } + // Adding the Return Opcode. + const llvm::TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + llvm::BuildMI(MBB, DL, TII->get(TII->getReturnOpcode())); +} + +namespace { + +// Implementation of this class relies on the fact that a single object with a +// single function will be loaded into memory. +class TrackingSectionMemoryManager : public llvm::SectionMemoryManager { +public: + explicit TrackingSectionMemoryManager(uintptr_t *CodeSize) + : CodeSize(CodeSize) {} + + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + llvm::StringRef SectionName) override { + *CodeSize = Size; + return llvm::SectionMemoryManager::allocateCodeSection( + Size, Alignment, SectionID, SectionName); + } + +private: + uintptr_t *const CodeSize = nullptr; +}; + +} // namespace + +JitFunctionContext::JitFunctionContext( + std::unique_ptr TheTM) + : Context(llvm::make_unique()), TM(std::move(TheTM)), + MMI(llvm::make_unique(TM.get())), + Module(llvm::make_unique(kModuleID, *Context)) { + Module->setDataLayout(TM->createDataLayout()); + MF = &createVoidVoidMachineFunction(kFunctionID, Module.get(), MMI.get()); + // We need to instruct the passes that we're done with SSA and virtual + // registers. + auto &Properties = MF->getProperties(); + Properties.set(llvm::MachineFunctionProperties::Property::NoVRegs); + Properties.reset(llvm::MachineFunctionProperties::Property::IsSSA); + Properties.reset(llvm::MachineFunctionProperties::Property::TracksLiveness); + // prologue/epilogue pass needs the reserved registers to be frozen, this is + // usually done by the SelectionDAGISel pass. + MF->getRegInfo().freezeReservedRegs(*MF); + // Saving reserved registers for client. + ReservedRegs = MF->getSubtarget().getRegisterInfo()->getReservedRegs(*MF); +} + +JitFunction::JitFunction(JitFunctionContext &&Context, + llvm::ArrayRef Instructions) + : FunctionContext(std::move(Context)) { + fillMachineFunction(*FunctionContext.MF, Instructions); + // We create the pass manager, run the passes and returns the produced + // ObjectFile. + llvm::object::OwningBinary ObjHolder = + assemble(FunctionContext.Module.get(), std::move(FunctionContext.MMI), + FunctionContext.TM.get()); + assert(ObjHolder.getBinary()); + // Initializing the execution engine. + // We need to use the JIT EngineKind to be able to add an object file. + LLVMLinkInMCJIT(); + uintptr_t CodeSize = 0; + std::string Error; + ExecEngine.reset( + llvm::EngineBuilder(std::move(FunctionContext.Module)) + .setErrorStr(&Error) + .setMCPU(FunctionContext.TM->getTargetCPU()) + .setEngineKind(llvm::EngineKind::JIT) + .setMCJITMemoryManager( + llvm::make_unique(&CodeSize)) + .create(FunctionContext.TM.release())); + if (!ExecEngine) + llvm::report_fatal_error(Error); + // Adding the generated object file containing the assembled function. + // The ExecutionEngine makes sure the object file is copied into an + // executable page. + ExecEngine->addObjectFile(ObjHolder.takeBinary().first); + // Setting function + FunctionBytes = + llvm::StringRef(reinterpret_cast( + ExecEngine->getFunctionAddress(kFunctionID)), + CodeSize); +} + +} // namespace exegesis Index: tools/llvm-exegesis/lib/InstructionSnippetGenerator.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/InstructionSnippetGenerator.h @@ -0,0 +1,119 @@ +//===-- InstructionSnippetGenerator.h ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines helper classes to generate code snippets, in particular register +/// assignment. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_INSTRUCTIONSNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_INSTRUCTIONSNIPPETGENERATOR_H + +#include "OperandGraph.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include + +namespace exegesis { + +// A Variable represents a set of possible values that we need to choose from. +// It may represent one or more explicit operands that are tied together, or one +// implicit operand. +class Variable final { +public: + bool IsUse = false; + bool IsDef = false; + bool IsReg = false; + + // Lists all the explicit operand indices that are tied to this variable. + // Empty if Variable represents an implicit operand. + llvm::SmallVector ExplicitOperands; + + // - In case of explicit operands, PossibleRegisters is the expansion of the + // operands's RegClass registers. Please note that tied together explicit + // operands share the same RegClass. + // - In case of implicit operands, PossibleRegisters is a singleton MCPhysReg. + llvm::SmallSetVector PossibleRegisters; + + // If RegInfo is null, register names won't get resolved. + void print(llvm::raw_ostream &OS, const llvm::MCRegisterInfo *RegInfo) const; +}; + +// Builds a model of implicit and explicit operands for InstrDesc into +// Variables. +llvm::SmallVector +getVariables(const llvm::MCRegisterInfo &RegInfo, + const llvm::MCInstrDesc &InstrDesc, + const llvm::BitVector &ReservedRegs); + +// A simple object to represent a Variable assignement. +struct VariableAssignment { + VariableAssignment(size_t VarIdx, llvm::MCPhysReg AssignedReg); + + size_t VarIdx; + llvm::MCPhysReg AssignedReg; + + bool operator==(const VariableAssignment &) const; + bool operator<(const VariableAssignment &) const; +}; + +// An AssignmentChain is a set of assignement realizing a dependency chain. +// We inherit from std::set to leverage uniqueness of elements. +using AssignmentChain = std::set; + +// Debug function to print an assignment chain. +void dumpAssignmentChain(const llvm::MCRegisterInfo &RegInfo, + const AssignmentChain &Chain); + +// Inserts Variables into a graph representing register aliasing and finds all +// the possible dependency chains for this instruction, i.e. all the possible +// assignement of operands that would make execution of the instruction +// sequential. +std::vector +computeSequentialAssignmentChains(const llvm::MCRegisterInfo &RegInfo, + const llvm::ArrayRef Vars); + +// Selects a random configuration leading to a dependency chain. +// The result is a vector of the same size as `Vars`. +// `random_index_for_size` is a functor giving a random value in [0, arg[. +std::vector +getRandomAssignment(const llvm::ArrayRef Vars, + const llvm::ArrayRef Chains, + const std::function &RandomIndexForSize); + +// Finds an assignment of registers to variables such that no two variables are +// assigned the same register. +// The result is a vector of the same size as `Vars`, or `{}` if the +// assignment is not feasible. +std::vector +getExclusiveAssignment(const llvm::ArrayRef Vars); + +// Finds a greedy assignment of registers to variables. Each variable gets +// assigned the first possible register that is not already assigned to a +// previous variable. If there is no such register, the variable gets assigned +// the first possible register. +// The result is a vector of the same size as `Vars`, or `{}` if the +// assignment is not feasible. +std::vector +getGreedyAssignment(const llvm::ArrayRef Vars); + +// Generates an LLVM MCInst with the previously computed variables. +// Immediate values are set to 1. +llvm::MCInst generateMCInst(const llvm::MCInstrDesc &InstrDesc, + const llvm::ArrayRef Vars, + const llvm::ArrayRef VarRegs); + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_INSTRUCTIONSNIPPETGENERATOR_H Index: tools/llvm-exegesis/lib/InstructionSnippetGenerator.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/InstructionSnippetGenerator.cpp @@ -0,0 +1,359 @@ +//===-- InstructionSnippetGenerator.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InstructionSnippetGenerator.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/MC/MCInstBuilder.h" +#include +#include +#include + +namespace exegesis { + +void Variable::print(llvm::raw_ostream &OS, + const llvm::MCRegisterInfo *RegInfo) const { + OS << "IsUse=" << IsUse << " IsDef=" << IsDef << " possible regs: {"; + for (const size_t Reg : PossibleRegisters) { + if (RegInfo) { + OS << RegInfo->getName(Reg); + } else { + OS << Reg; + } + OS << ","; + } + OS << "} "; + if (ExplicitOperands.empty()) { + OS << "implicit"; + } else { + OS << "explicit ops: {"; + for (const size_t Op : ExplicitOperands) { + OS << Op << ","; + } + OS << "}"; + } + OS << "\n"; +} + +// Update the state of a Variable with an explicit operand. +static void updateExplicitOperandVariable(const llvm::MCRegisterInfo &RegInfo, + const llvm::MCInstrDesc &InstrInfo, + const size_t op_index, + const llvm::BitVector &ReservedRegs, + Variable &Var) { + const bool IsDef = op_index < InstrInfo.getNumDefs(); + if (IsDef == true) + Var.IsDef = true; + if (IsDef == false) + Var.IsUse = true; + Var.ExplicitOperands.push_back(op_index); + const llvm::MCOperandInfo &op_info = InstrInfo.opInfo_begin()[op_index]; + if (op_info.RegClass >= 0) { + Var.IsReg = true; + for (const llvm::MCPhysReg ® : RegInfo.getRegClass(op_info.RegClass)) { + if (!ReservedRegs[reg]) + Var.PossibleRegisters.insert(reg); + } + } +} + +static Variable &findVariableWithOperand(llvm::SmallVector &Vars, + size_t op_index) { + // Vars.size() is small (<10) so a linear scan is good enough. + for (Variable &Var : Vars) { + if (llvm::is_contained(Var.ExplicitOperands, op_index)) + return Var; + } + assert(false && "Illegal state"); + static Variable *const kEmptyVariable = new Variable(); + return *kEmptyVariable; +} + +llvm::SmallVector +getVariables(const llvm::MCRegisterInfo &RegInfo, + const llvm::MCInstrDesc &InstrInfo, + const llvm::BitVector &ReservedRegs) { + llvm::SmallVector Vars; + // For each operand, its "tied to" operand or -1. + llvm::SmallVector tied_to_map; + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + tied_to_map.push_back( + InstrInfo.getOperandConstraint(i, llvm::MCOI::TIED_TO)); + } + // Adding non tied operands. + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + if (tied_to_map[i] >= 0) + continue; // dropping tied ones. + Vars.emplace_back(); + updateExplicitOperandVariable(RegInfo, InstrInfo, i, ReservedRegs, + Vars.back()); + } + // Adding tied operands to existing variables. + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + if (tied_to_map[i] < 0) + continue; // dropping non-tied ones. + updateExplicitOperandVariable( + RegInfo, InstrInfo, i, ReservedRegs, + findVariableWithOperand(Vars, tied_to_map[i])); + } + // Adding implicit defs. + for (size_t i = 0; i < InstrInfo.getNumImplicitDefs(); ++i) { + Vars.emplace_back(); + Variable &Var = Vars.back(); + const llvm::MCPhysReg reg = InstrInfo.getImplicitDefs()[i]; + assert(!ReservedRegs[reg]); + Var.PossibleRegisters.insert(reg); + Var.IsDef = true; + Var.IsReg = true; + } + // Adding implicit uses. + for (size_t i = 0; i < InstrInfo.getNumImplicitUses(); ++i) { + Vars.emplace_back(); + Variable &Var = Vars.back(); + const llvm::MCPhysReg reg = InstrInfo.getImplicitUses()[i]; + assert(!ReservedRegs[reg]); + Var.PossibleRegisters.insert(reg); + Var.IsUse = true; + Var.IsReg = true; + } + + return Vars; +} + +VariableAssignment::VariableAssignment(size_t VarIdx, + llvm::MCPhysReg AssignedReg) + : VarIdx(VarIdx), AssignedReg(AssignedReg) {} + +bool VariableAssignment::operator==(const VariableAssignment &other) const { + return std::tie(VarIdx, AssignedReg) == + std::tie(other.VarIdx, other.AssignedReg); +} + +bool VariableAssignment::operator<(const VariableAssignment &other) const { + return std::tie(VarIdx, AssignedReg) < + std::tie(other.VarIdx, other.AssignedReg); +} + +void dumpAssignmentChain(const llvm::MCRegisterInfo &RegInfo, + const AssignmentChain &Chain) { + for (const VariableAssignment &Assignment : Chain) { + llvm::outs() << llvm::format("(%d %s) ", Assignment.VarIdx, + RegInfo.getName(Assignment.AssignedReg)); + } + llvm::outs() << "\n"; +} + +std::vector +computeSequentialAssignmentChains(const llvm::MCRegisterInfo &RegInfo, + const llvm::ArrayRef Vars) { + using graph::Node; + graph::Graph graph; + + // Add register aliasing to the graph. + setupRegisterAliasing(RegInfo, graph); + + // Adding variables to the graph. + for (size_t i = 0; i < Vars.size(); ++i) { + const Variable &Var = Vars[i]; + const Node node = Node::Var(i); + if (Var.IsDef) { + graph.connect(Node::In(), node); + for (const size_t reg : Var.PossibleRegisters) + graph.connect(node, Node::Reg(reg)); + } + if (Var.IsUse) { + graph.connect(node, Node::Out()); + for (const size_t reg : Var.PossibleRegisters) + graph.connect(Node::Reg(reg), node); + } + } + + // Find all possible dependency chains (aka all possible paths from In to Out + // node). + std::vector all_chains; + for (;;) { + const auto path = graph.getPathFrom(Node::In(), Node::Out()); + if (path.empty()) + break; + switch (path.size()) { + case 0: + case 1: + case 2: + case 4: + assert(false && "Illegal state"); + break; + case 3: { // IN -> variable -> OUT + const size_t VarIdx = path[1].varValue(); + for (size_t reg : Vars[VarIdx].PossibleRegisters) { + all_chains.emplace_back(); + all_chains.back().emplace(VarIdx, reg); + } + graph.disconnect(path[0], path[1]); // IN -> variable + graph.disconnect(path[1], path[2]); // variable -> OUT + break; + } + default: { // IN -> var1 -> Reg[...] -> var2 -> OUT + const size_t last = path.size() - 1; + const size_t var1 = path[1].varValue(); + const llvm::MCPhysReg reg1 = path[2].regValue(); + const llvm::MCPhysReg reg2 = path[last - 2].regValue(); + const size_t var2 = path[last - 1].varValue(); + all_chains.emplace_back(); + all_chains.back().emplace(var1, reg1); + all_chains.back().emplace(var2, reg2); + graph.disconnect(path[1], path[2]); // var1 -> Reg[0] + break; + } + } + } + + return all_chains; +} + +std::vector +getRandomAssignment(const llvm::ArrayRef Vars, + const llvm::ArrayRef Chains, + const std::function &RandomIndexForSize) { + // registers are initialized with 0 (aka NoRegister). + std::vector registers(Vars.size(), 0); + if (Chains.empty()) + return registers; + // Pick one of the chains and set registers that are fully constrained (have + // no degrees of freedom). + const size_t chain_index = RandomIndexForSize(Chains.size()); + for (const VariableAssignment Assignment : Chains[chain_index]) + registers[Assignment.VarIdx] = Assignment.AssignedReg; + // Registers with remaining degrees of freedom are assigned randomly. + for (size_t i = 0; i < Vars.size(); ++i) { + llvm::MCPhysReg ® = registers[i]; + const Variable &Var = Vars[i]; + const auto &possible_registers = Var.PossibleRegisters; + if (reg > 0 || possible_registers.empty()) + continue; + reg = possible_registers[RandomIndexForSize(possible_registers.size())]; + } + return registers; +} + +// Finds a matching register `reg` for variable `VarIdx` and sets +// `RegAssignments[r]` to `VarIdx`. Returns false if no matching can be found. +// `seen.count(r)` is 1 if register `reg` has been processed. +static bool findMatchingRegister( + const llvm::ArrayRef Vars, const size_t VarIdx, + std::unordered_set &Seen, + std::unordered_map &RegAssignments) { + for (const llvm::MCPhysReg Reg : Vars[VarIdx].PossibleRegisters) { + if (!Seen.count(Reg)) { + Seen.insert(Reg); // Mark `reg` as seen. + // If `reg` is not assigned to a variable, or if `reg` was assigned to a + // variable which has an alternate possible register, assign `reg` to + // variable `VarIdx`. Since `reg` is marked as assigned in the above line, + // `RegAssignments[r]` in the following recursive call will not get + // assigned `reg` again. + const auto AssignedVarIt = RegAssignments.find(Reg); + if (AssignedVarIt == RegAssignments.end() || + findMatchingRegister(Vars, AssignedVarIt->second, Seen, + RegAssignments)) { + RegAssignments[Reg] = VarIdx; + return true; + } + } + } + return false; +} + +// This is actually a maximum bipartite matching problem: +// https://en.wikipedia.org/wiki/Matching_(graph_theory)#Bipartite_matching +// The graph has variables on the left and registers on the right, with an edge +// between variable `I` and register `reg` iff +// `Vars[I].PossibleRegisters.count(A)`. +// Note that a greedy approach won't work for cases like: +// Vars[0] PossibleRegisters={C,B} +// Vars[1] PossibleRegisters={A,B} +// Vars[2] PossibleRegisters={A,C} +// There is a feasible solution {0->B, 1->A, 2->C}, but the greedy solution is +// {0->C, 1->A, oops}. +std::vector +getExclusiveAssignment(const llvm::ArrayRef Vars) { + // `RegAssignments[r]` is the variable id that was assigned register `reg`. + std::unordered_map RegAssignments; + + for (size_t VarIdx = 0; VarIdx < Vars.size(); ++VarIdx) { + if (!Vars[VarIdx].IsReg) + continue; + std::unordered_set Seen; + if (!findMatchingRegister(Vars, VarIdx, Seen, RegAssignments)) + return {}; // Infeasible. + } + + std::vector Registers(Vars.size(), 0); + for (const auto &Reg_VarIdx : RegAssignments) + Registers[Reg_VarIdx.second] = Reg_VarIdx.first; + return Registers; +} + +std::vector +getGreedyAssignment(const llvm::ArrayRef Vars) { + std::vector Registers(Vars.size(), 0); + llvm::SmallSet Assigned; + for (size_t VarIdx = 0; VarIdx < Vars.size(); ++VarIdx) { + const auto &Var = Vars[VarIdx]; + if (!Var.IsReg) + continue; + if (Var.PossibleRegisters.empty()) + return {}; + // Try possible registers until an unassigned one is found. + for (const auto Reg : Var.PossibleRegisters) { + if (Assigned.insert(Reg).second) { + Registers[VarIdx] = Reg; + break; + } + } + // Fallback to first possible register. + if (Registers[VarIdx] == 0) + Registers[VarIdx] = Var.PossibleRegisters[0]; + } + return Registers; +} + +llvm::MCInst generateMCInst(const llvm::MCInstrDesc &InstrInfo, + const llvm::ArrayRef Vars, + const llvm::ArrayRef VarRegs) { + const size_t num_operands = InstrInfo.getNumOperands(); + llvm::SmallVector operand_to_register(num_operands, 0); + + // We browse the variable and for each explicit operands we set the selected + // register in the operand_to_register array. + for (size_t i = 0; i < Vars.size(); ++i) { + for (const size_t operand_index : Vars[i].ExplicitOperands) { + operand_to_register[operand_index] = VarRegs[i]; + } + } + + // Building the instruction. + llvm::MCInstBuilder builder(InstrInfo.getOpcode()); + for (size_t i = 0; i < InstrInfo.getNumOperands(); ++i) { + const llvm::MCOperandInfo &op_info = InstrInfo.opInfo_begin()[i]; + switch (op_info.OperandType) { + case llvm::MCOI::OperandType::OPERAND_REGISTER: + builder.addReg(operand_to_register[i]); + break; + case llvm::MCOI::OperandType::OPERAND_IMMEDIATE: + builder.addImm(1); + break; + default: + builder.addOperand(llvm::MCOperand()); + } + } + + return builder; +} + +} // namespace exegesis Index: tools/llvm-exegesis/lib/LLVMBuild.txt =================================================================== --- tools/llvm-exegesis/lib/LLVMBuild.txt +++ tools/llvm-exegesis/lib/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./tools/LLVMBuild.txt ------------------------------------*- Conf -*--===; +;===- ./tools/llvm-exegesis/lib/LLVMBuild.txt ------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,45 +15,8 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = - bugpoint - dsymutil - llc - lli - llvm-ar - llvm-as - llvm-bcanalyzer - llvm-cat - llvm-cfi-verify - llvm-cov - llvm-cvtres - llvm-diff - llvm-dis - llvm-dwarfdump - llvm-dwp - llvm-extract - llvm-jitlistener - llvm-link - llvm-lto - llvm-mc - llvm-mca - llvm-mcmarkup - llvm-modextract - llvm-mt - llvm-nm - llvm-objcopy - llvm-objdump - llvm-pdbutil - llvm-profdata - llvm-rc - llvm-rtdyld - llvm-size - llvm-split - opt - verify-uselistorder - [component_0] -type = Group -name = Tools -parent = $ROOT +type = Library +name = Exegesis +parent = Libraries +required_libraries = CodeGen ExecutionEngine MC MCJIT Object Support Index: tools/llvm-exegesis/lib/Latency.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/Latency.h @@ -0,0 +1,41 @@ +//===-- Latency.h -----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure instruction latencies. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H +#define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H + +#include "BenchmarkRunner.h" + +namespace exegesis { + +class LatencyBenchmarkRunner : public BenchmarkRunner { +public: + ~LatencyBenchmarkRunner() override; + +private: + const char *getDisplayName() const override; + + llvm::Expected> + createCode(const LLVMState &State, unsigned OpcodeIndex, + unsigned NumRepetitions, + const JitFunctionContext &Context) const override; + + std::vector + runMeasurements(const LLVMState &State, const JitFunction &Function, + unsigned NumRepetitions) const override; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/Latency.cpp @@ -0,0 +1,95 @@ +//===-- Latency.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkResult.h" +#include "InstructionSnippetGenerator.h" +#include "Latency.h" +#include "PerfHelper.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace exegesis { + +// TODO(courbet): Handle memory. +static bool isInvalidOperand(const llvm::MCOperandInfo &OpInfo) { + switch (OpInfo.OperandType) { + default: + return true; + case llvm::MCOI::OPERAND_IMMEDIATE: + case llvm::MCOI::OPERAND_REGISTER: + return false; + } +} + +static llvm::Error makeError(llvm::Twine Msg) { + return llvm::make_error(Msg, + llvm::inconvertibleErrorCode()); +} + +LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; + +const char *LatencyBenchmarkRunner::getDisplayName() const { return "latency"; } + +llvm::Expected> LatencyBenchmarkRunner::createCode( + const LLVMState &State, const unsigned OpcodeIndex, + const unsigned NumRepetitions, const JitFunctionContext &Context) const { + std::default_random_engine RandomEngine; + const auto GetRandomIndex = [&RandomEngine](size_t Size) { + assert(Size > 0); + return std::uniform_int_distribution<>(0, Size - 1)(RandomEngine); + }; + + const auto &InstrInfo = State.getInstrInfo(); + const auto &RegInfo = State.getRegInfo(); + const llvm::MCInstrDesc &InstrDesc = InstrInfo.get(OpcodeIndex); + for (const llvm::MCOperandInfo &OpInfo : InstrDesc.operands()) { + if (isInvalidOperand(OpInfo)) + return makeError("Only registers and immediates are supported"); + } + + const auto Vars = getVariables(RegInfo, InstrDesc, Context.getReservedRegs()); + const std::vector AssignmentChains = + computeSequentialAssignmentChains(RegInfo, Vars); + if (AssignmentChains.empty()) + return makeError("Unable to find a dependency chain."); + const std::vector Regs = + getRandomAssignment(Vars, AssignmentChains, GetRandomIndex); + const llvm::MCInst Inst = generateMCInst(InstrDesc, Vars, Regs); + if (!State.canAssemble(Inst)) + return makeError("MCInst does not assemble."); + return std::vector(NumRepetitions, Inst); +} + +std::vector +LatencyBenchmarkRunner::runMeasurements(const LLVMState &State, + const JitFunction &Function, + const unsigned NumRepetitions) const { + // Cycle measurements include some overhead from the kernel. Repeat the + // measure several times and take the minimum value. + constexpr const int kNumMeasurements = 30; + int64_t MinLatency = std::numeric_limits::max(); + // TODO(courbet): This should be a property of the MCSchedModel. + const pfm::PerfEvent CyclesPerfEvent("UNHALTED_CORE_CYCLES"); + if (!CyclesPerfEvent.valid()) + llvm::report_fatal_error("invalid perf event 'UNHALTED_CORE_CYCLES'"); + for (size_t I = 0; I < kNumMeasurements; ++I) { + pfm::Counter Counter(CyclesPerfEvent); + Counter.start(); + Function(); + Counter.stop(); + const int64_t Value = Counter.read(); + if (Value < MinLatency) + MinLatency = Value; + } + return {{"latency", static_cast(MinLatency) / NumRepetitions}}; +} + +} // namespace exegesis Index: tools/llvm-exegesis/lib/LlvmState.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/LlvmState.h @@ -0,0 +1,59 @@ +//===-- LlvmState.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +#include +#include + +namespace exegesis { + +// An object to initialize LLVM and prepare objects needed to run the +// measurements. +class LLVMState { +public: + LLVMState(); + + const std::string &getTriple() const; + + const std::string &getCpuName() const; + + const std::string &getFeatures() const; + + const llvm::MCSubtargetInfo &getSubtargetInfo() const; + + const llvm::MCInstrInfo &getInstrInfo() const; + + const llvm::MCRegisterInfo &getRegInfo() const; + + std::unique_ptr createTargetMachine() const; + + bool canAssemble(const llvm::MCInst &mc_inst) const; + +private: + std::string TheTriple; + std::string CpuName; + std::string Features; + const llvm::Target *TheTarget = nullptr; + std::unique_ptr SubtargetInfo; + std::unique_ptr InstrInfo; + std::unique_ptr RegInfo; + std::unique_ptr AsmInfo; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H Index: tools/llvm-exegesis/lib/LlvmState.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/LlvmState.cpp @@ -0,0 +1,68 @@ +//===-- LlvmState.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LlvmState.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace exegesis { + +LLVMState::LLVMState() + : TheTriple(llvm::sys::getProcessTriple()), + CpuName(llvm::sys::getHostCPUName().str()) { + std::string Error; + TheTarget = llvm::TargetRegistry::lookupTarget(TheTriple, Error); + assert(TheTarget); + SubtargetInfo.reset( + TheTarget->createMCSubtargetInfo(TheTriple, CpuName, Features)); + InstrInfo.reset(TheTarget->createMCInstrInfo()); + RegInfo.reset(TheTarget->createMCRegInfo(TheTriple)); + AsmInfo.reset(TheTarget->createMCAsmInfo(*RegInfo, TheTriple)); +} + +const std::string &LLVMState::getTriple() const { return TheTriple; } +const std::string &LLVMState::getCpuName() const { return CpuName; } +const std::string &LLVMState::getFeatures() const { return Features; } + +const llvm::MCInstrInfo &LLVMState::getInstrInfo() const { return *InstrInfo; } + +const llvm::MCRegisterInfo &LLVMState::getRegInfo() const { return *RegInfo; } + +const llvm::MCSubtargetInfo &LLVMState::getSubtargetInfo() const { + return *SubtargetInfo; +} + +std::unique_ptr +LLVMState::createTargetMachine() const { + const llvm::TargetOptions Options; + return std::unique_ptr( + static_cast(TheTarget->createTargetMachine( + TheTriple, CpuName, Features, Options, llvm::Reloc::Model::Static))); +} + +bool LLVMState::canAssemble(const llvm::MCInst &mc_inst) const { + llvm::MCObjectFileInfo ObjectFileInfo; + llvm::MCContext Context(AsmInfo.get(), RegInfo.get(), &ObjectFileInfo); + std::unique_ptr CodeEmitter( + TheTarget->createMCCodeEmitter(*InstrInfo, *RegInfo, Context)); + llvm::SmallVector Tmp; + llvm::raw_svector_ostream OS(Tmp); + llvm::SmallVector Fixups; + CodeEmitter->encodeInstruction(mc_inst, OS, Fixups, *SubtargetInfo); + return Tmp.size() > 0; +} + +} // namespace exegesis Index: tools/llvm-exegesis/lib/OperandGraph.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/OperandGraph.h @@ -0,0 +1,89 @@ +//===-- OperandGraph.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A collection of tools to model register aliasing and instruction operand. +/// This is used to find an aliasing between the input and output registers of +/// an instruction. It allows us to repeat an instruction and make sure that +/// successive instances are executed sequentially. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_OPERANDGRAPH_H +#define LLVM_TOOLS_LLVM_EXEGESIS_OPERANDGRAPH_H + +#include "llvm/MC/MCRegisterInfo.h" +#include +#include +#include +#include + +namespace exegesis { +namespace graph { + +enum class NodeType { + VARIABLE, // An set of "tied together operands" to resolve. + REG, // A particular register. + IN, // The input node. + OUT // The output node. +}; + +// A Node in the graph, it has a type and an int value. +struct Node : public std::pair { + using std::pair::pair; + + static Node Reg(int Value) { return {NodeType::REG, Value}; } + static Node Var(int Value) { return {NodeType::VARIABLE, Value}; } + static Node In() { return {NodeType::IN, 0}; } + static Node Out() { return {NodeType::OUT, 0}; } + + NodeType type() const; + int regValue() const; // checks that type==REG and returns value. + int varValue() const; // checks that type==VARIABLE and returns value. + + void dump(const llvm::MCRegisterInfo &RegInfo) const; +}; + +// Graph represents the connectivity of registers for a particular instruction. +// This object is used to select registers that would create a dependency chain +// between instruction's input and output. +struct Graph { +public: + void connect(const Node From, const Node To); + void disconnect(const Node From, const Node To); + + // Tries to find a path between 'From' and 'To' nodes. + // Returns empty if no path is found. + std::vector getPathFrom(const Node From, const Node To) const; + +private: + // We use std::set to keep the implementation simple, using an unordered_set + // requires the definition of a hasher. + using NodeSet = std::set; + + // Performs a Depth First Search from 'current' node up until 'sentinel' node + // is found. 'path' is the recording of the traversed nodes, 'seen' is the + // collection of nodes seen so far. + bool dfs(const Node Current, const Node Sentinel, std::vector &Path, + NodeSet &Seen) const; + + // We use std::map to keep the implementation simple, using an unordered_map + // requires the definition of a hasher. + std::map AdjacencyLists; +}; + +// Add register nodes to graph and connect them when they alias. Connection is +// both ways. +void setupRegisterAliasing(const llvm::MCRegisterInfo &RegInfo, + Graph &TheGraph); + +} // namespace graph +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_OPERANDGRAPH_H Index: tools/llvm-exegesis/lib/OperandGraph.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/OperandGraph.cpp @@ -0,0 +1,115 @@ +//===-- OperandGraph.cpp ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OperandGraph.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace exegesis { +namespace graph { + +void Node::dump(const llvm::MCRegisterInfo &RegInfo) const { + switch (type()) { + case NodeType::VARIABLE: + printf(" %d", varValue()); + break; + case NodeType::REG: + printf(" %s", RegInfo.getName(regValue())); + break; + case NodeType::IN: + printf(" IN"); + break; + case NodeType::OUT: + printf(" OUT"); + break; + } +} + +NodeType Node::type() const { return first; } + +int Node::regValue() const { + assert(first == NodeType::REG); + return second; +} + +int Node::varValue() const { + assert(first == NodeType::VARIABLE); + return second; +} + +void Graph::connect(const Node From, const Node To) { + AdjacencyLists[From].insert(To); +} + +void Graph::disconnect(const Node From, const Node To) { + AdjacencyLists[From].erase(To); +} + +std::vector Graph::getPathFrom(const Node From, const Node To) const { + std::vector Path; + NodeSet Seen; + dfs(From, To, Path, Seen); + return Path; +} + +// DFS is implemented recursively, this is fine as graph size is small (~250 +// nodes, ~200 edges, longuest path depth < 10). +bool Graph::dfs(const Node Current, const Node Sentinel, + std::vector &Path, NodeSet &Seen) const { + Path.push_back(Current); + Seen.insert(Current); + if (Current == Sentinel) + return true; + if (AdjacencyLists.count(Current)) { + for (const Node Next : AdjacencyLists.find(Current)->second) { + if (Seen.count(Next)) + continue; + if (dfs(Next, Sentinel, Path, Seen)) + return true; + } + } + Path.pop_back(); + return false; +} + +// For each Register Units we walk up their parents. +// Let's take the case of the A register family: +// +// RAX +// ^ +// EAX +// ^ +// AX +// ^ ^ +// AH AL +// +// Register Units are AH and AL. +// Walking them up gives the following lists: +// AH->AX->EAX->RAX and AL->AX->EAX->RAX +// When walking the lists we add connect current to parent both ways leading to +// the following connections: +// +// AL<->AX, AH<->AX, AX<->EAX, EAX<->RAX +// We repeat this process for all Unit Registers to cover all connections. +void setupRegisterAliasing(const llvm::MCRegisterInfo &RegInfo, + Graph &TheGraph) { + using SuperItr = llvm::MCSuperRegIterator; + for (size_t Reg = 0; Reg < RegInfo.getNumRegUnits(); ++Reg) { + size_t Current = Reg; + for (SuperItr Super(Reg, &RegInfo); Super.isValid(); ++Super) { + const Node A = Node::Reg(Current); + const Node B = Node::Reg(*Super); + TheGraph.connect(A, B); + TheGraph.connect(B, A); + Current = *Super; + } + } +} + +} // namespace graph +} // namespace exegesis Index: tools/llvm-exegesis/lib/PerfHelper.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/PerfHelper.h @@ -0,0 +1,103 @@ +//===-- PerfHelper.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Helpers for measuring perf events. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +struct perf_event_attr; + +namespace exegesis { +namespace pfm { + +// Returns true on error. +bool PfmInitialize(); +void PfmTerminate(); + +// Retrieves the encoding for the event described by pfm_event_string. +// NOTE: pfm_initialize() must be called before creating PerfEvent objects. +class PerfEvent { +public: + // http://perfmon2.sourceforge.net/manv4/libpfm.html + // Events are expressed as strings. e.g. "INSTRUCTION_RETIRED" + explicit PerfEvent(llvm::StringRef pfm_event_string); + + PerfEvent(const PerfEvent &) = delete; + PerfEvent(PerfEvent &&other); + ~PerfEvent(); + + // The pfm_event_string passed at construction time. + const char *name() const; + + // Whether the event was successfully created. + bool valid() const; + + // The encoded event to be passed to the Kernel. + const perf_event_attr *attribute() const; + + // The fully qualified name for the event. + // e.g. "snb_ep::INSTRUCTION_RETIRED:e=0:i=0:c=0:t=0:u=1:k=0:mg=0:mh=1" + const std::string &getPfmEventString() const; + +private: + const std::string EventString; + std::string FullQualifiedEventString; + perf_event_attr *Attr; +}; + +// Uses a valid PerfEvent to configure the Kernel so we can measure the +// underlying event. +struct Counter { + // event: the PerfEvent to measure. + explicit Counter(const PerfEvent &event); + + Counter(const Counter &) = delete; + Counter(Counter &&other) = default; + + ~Counter(); + + void start(); // Starts the measurement of the event. + void stop(); // Stops the measurement of the event. + int64_t read() const; // Return the current value of the counter. + +private: + int FileDescriptor = -1; +}; + +// Helper to measure a list of PerfEvent for a particular function. +// callback is called for each successful measure (PerfEvent needs to be valid). +template +void Measure( + const llvm::ArrayRef Events, + const std::function &Callback, + Function Fn) { + for (const auto &Event : Events) { + if (!Event.valid()) + continue; + Counter Cnt(Event); + Cnt.start(); + Fn(); + Cnt.stop(); + Callback(Event, Cnt.read()); + } +} + +} // namespace pfm +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H Index: tools/llvm-exegesis/lib/PerfHelper.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/PerfHelper.cpp @@ -0,0 +1,131 @@ +//===-- PerfHelper.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PerfHelper.h" +#include "llvm/Config/config.h" +#include "llvm/Support/raw_ostream.h" +#ifdef HAVE_LIBPFM +#include "perfmon/perf_event.h" +#include "perfmon/pfmlib.h" +#include "perfmon/pfmlib_perf_event.h" +#endif + +namespace exegesis { +namespace pfm { + +#ifdef HAVE_LIBPFM +static bool IsPfmError(int Code) { return Code != PFM_SUCCESS; } +#endif + +bool PfmInitialize() { +#ifdef HAVE_LIBPFM + return IsPfmError(pfm_initialize()); +#else + return true; +#endif +} + +void PfmTerminate() { +#ifdef HAVE_LIBPFM + pfm_terminate(); +#endif +} + +PerfEvent::~PerfEvent() { +#ifdef HAVE_LIBPFM + delete Attr; + ; +#endif +} + +PerfEvent::PerfEvent(PerfEvent &&other) + : EventString(std::move(other.EventString)), + FullQualifiedEventString(std::move(other.FullQualifiedEventString)), + Attr(other.Attr) { + other.Attr = nullptr; +} + +PerfEvent::PerfEvent(llvm::StringRef PfmEventString) + : EventString(PfmEventString.str()), Attr(nullptr) { +#ifdef HAVE_LIBPFM + char *fstr = nullptr; + pfm_perf_encode_arg_t arg = {}; + Attr = new perf_event_attr(); + arg.attr = Attr; + arg.fstr = &fstr; + arg.size = sizeof(pfm_perf_encode_arg_t); + const int result = pfm_get_os_event_encoding(EventString.c_str(), PFM_PLM3, + PFM_OS_PERF_EVENT, &arg); + if (IsPfmError(result)) { + // We don't know beforehand which counters are available (e.g. 6 uops ports + // on Sandybridge but 8 on Haswell) so we report the missing counter without + // crashing. + llvm::errs() << pfm_strerror(result) << " - cannot create event " + << EventString; + } + if (fstr) { + FullQualifiedEventString = fstr; + free(fstr); + } +#endif +} + +const char *PerfEvent::name() const { return EventString.c_str(); } + +bool PerfEvent::valid() const { + return !FullQualifiedEventString.empty(); +} + +const perf_event_attr *PerfEvent::attribute() const { return Attr; } + +const std::string &PerfEvent::getPfmEventString() const { + return FullQualifiedEventString; +} + +#ifdef HAVE_LIBPFM +Counter::Counter(const PerfEvent &Event) { + const pid_t Pid = 0; // measure current process/thread. + const int Cpu = -1; // measure any processor. + const int GroupFd = -1; // no grouping of counters. + const uint32_t Flags = 0; + perf_event_attr AttrCopy = *Event.attribute(); + FileDescriptor = perf_event_open(&AttrCopy, Pid, Cpu, GroupFd, Flags); + assert(FileDescriptor != -1 && + "Unable to open event, make sure your kernel allows user space perf " + "monitoring."); +} + +Counter::~Counter() { close(FileDescriptor); } + +void Counter::start() { ioctl(FileDescriptor, PERF_EVENT_IOC_RESET, 0); } + +void Counter::stop() { ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); } + +int64_t Counter::read() const { + int64_t Count = 0; + ::read(FileDescriptor, &Count, sizeof(Count)); + return Count; +} + +#else + +Counter::Counter(const PerfEvent &Event) : FileDescriptor(-1) {} + +Counter::~Counter() = default; + +void Counter::start() {} + +void Counter::stop() {} + +int64_t Counter::read() const { return 42; } + +#endif + +} // namespace pfm +} // namespace exegesis Index: tools/llvm-exegesis/lib/Uops.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/Uops.h @@ -0,0 +1,41 @@ +//===-- Uops.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure uop decomposition. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H +#define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H + +#include "BenchmarkRunner.h" + +namespace exegesis { + +class UopsBenchmarkRunner : public BenchmarkRunner { +public: + ~UopsBenchmarkRunner() override; + +private: + const char *getDisplayName() const override; + + llvm::Expected> + createCode(const LLVMState &State, unsigned OpcodeIndex, + unsigned NumRepetitions, + const JitFunctionContext &Context) const override; + + std::vector + runMeasurements(const LLVMState &State, const JitFunction &Function, + unsigned NumRepetitions) const override; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/Uops.cpp @@ -0,0 +1,249 @@ +//===-- Uops.cpp ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkResult.h" +#include "InstructionSnippetGenerator.h" +#include "PerfHelper.h" +#include "Uops.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +namespace exegesis { + +// TODO(courbet): Handle memory. +static bool isInvalidOperand(const llvm::MCOperandInfo &OpInfo) { + switch (OpInfo.OperandType) { + default: + return true; + case llvm::MCOI::OPERAND_IMMEDIATE: + case llvm::MCOI::OPERAND_REGISTER: + return false; + } +} + +static llvm::Error makeError(llvm::Twine Msg) { + return llvm::make_error(Msg, + llvm::inconvertibleErrorCode()); +} + +// TODO(courbet): Remove this. Counter names should be optional properties of +// ProcResUnits. +static const std::string *getEventNameFromProcResName(const char *ProcResName) { + static const std::unordered_map Entries = { + {"SBPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"SBPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"SBPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"SBPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"HWPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"HWPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"HWPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, + {"HWPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, + {"HWPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"HWPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"HWPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, + {"HWPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, + {"SKLPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"SKLPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"SKLPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, + {"SKLPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, + {"SKLPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"SKLPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"SKLPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, + {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, + {"SKXPort0", "UOPS_DISPATCHED_PORT:PORT_0"}, + {"SKXPort1", "UOPS_DISPATCHED_PORT:PORT_1"}, + {"SKXPort2", "UOPS_DISPATCHED_PORT:PORT_2"}, + {"SKXPort3", "UOPS_DISPATCHED_PORT:PORT_3"}, + {"SKXPort4", "UOPS_DISPATCHED_PORT:PORT_4"}, + {"SKXPort5", "UOPS_DISPATCHED_PORT:PORT_5"}, + {"SKXPort6", "UOPS_DISPATCHED_PORT:PORT_6"}, + {"SKXPort7", "UOPS_DISPATCHED_PORT:PORT_7"}, + }; + const auto It = Entries.find(ProcResName); + return It == Entries.end() ? nullptr : &It->second; +} + +static std::vector generateIndependentAssignments( + const LLVMState &State, const llvm::MCInstrDesc &InstrDesc, + llvm::SmallVector Vars, int MaxAssignments) { + std::unordered_set IsUsedByAnyVar; + for (const Variable &Var : Vars) { + if (Var.IsUse) { + IsUsedByAnyVar.insert(Var.PossibleRegisters.begin(), + Var.PossibleRegisters.end()); + } + } + + std::vector pattern; + for (int A = 0; A < MaxAssignments; ++A) { + // TODO(courbet): This is a bit pessimistic. We should get away with an + // assignment that ensures that the set of assigned registers for uses and + // the set of assigned registers for defs do not intersect (registers + // for uses (resp defs) do not have to be all distinct). + const std::vector Regs = getExclusiveAssignment(Vars); + if (Regs.empty()) + break; + // Remove all assigned registers defs that are used by at least one other + // variable from the list of possible variable registers. This ensures that + // we never create a RAW hazard that would lead to serialization. + for (size_t i = 0; i < Vars.size(); ++i) { + llvm::MCPhysReg Reg = Regs[i]; + if (Vars[i].IsDef && IsUsedByAnyVar.count(Reg)) { + Vars[i].PossibleRegisters.remove(Reg); + } + } + // Create an MCInst and check assembly. + llvm::MCInst Inst = generateMCInst(InstrDesc, Vars, Regs); + if (!State.canAssemble(Inst)) + continue; + pattern.push_back(std::move(Inst)); + } + return pattern; +} + +UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; + +const char *UopsBenchmarkRunner::getDisplayName() const { return "uops"; } + +llvm::Expected> UopsBenchmarkRunner::createCode( + const LLVMState &State, const unsigned OpcodeIndex, + const unsigned NumRepetitions, const JitFunctionContext &Context) const { + const auto &InstrInfo = State.getInstrInfo(); + const auto &RegInfo = State.getRegInfo(); + const llvm::MCInstrDesc &InstrDesc = InstrInfo.get(OpcodeIndex); + for (const llvm::MCOperandInfo &OpInfo : InstrDesc.operands()) { + if (isInvalidOperand(OpInfo)) + return makeError("Only registers and immediates are supported"); + } + + // TODO(courbet): Load constants into registers (e.g. with fld1) to not break + // instructions like x87. + + // TODO(courbet): Move that to an external doc. + // Ideally we would like the only limitation on executing uops to be the issue + // ports. Maximizing port pressure increases the likelihood that the load is + // distributed evenly across possible ports. + + // To achieve that, one approach is to generate instructions that do not have + // data dependencies between them. + // + // For some instructions, this is trivial: + // mov rax, qword ptr [rsi] + // mov rax, qword ptr [rsi] + // mov rax, qword ptr [rsi] + // mov rax, qword ptr [rsi] + // For the above snippet, haswell just renames rax four times and executes the + // four instructions two at a time on P23 and P0126. + // + // For some instructions, we just need to make sure that the source is + // different from the destination. For example, IDIV8r reads from GPR and + // writes to AX. We just need to ensure that the variable is assigned a + // register which is different from AX: + // idiv bx + // idiv bx + // idiv bx + // idiv bx + // The above snippet will be able to fully saturate the ports, while the same + // with ax would issue one uop every `latency(IDIV8r)` cycles. + // + // Some instructions make this harder because they both read and write from + // the same register: + // inc rax + // inc rax + // inc rax + // inc rax + // This has a data dependency from each instruction to the next, limit the + // number of instructions that can be issued in parallel. + // It turns out that this is not a big issue on recent Intel CPUs because they + // have heuristics to balance port pressure. In the snippet above, subsequent + // instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs + // might end up executing them all on P0 (just because they can), or try + // avoiding P5 because it's usually under high pressure from vector + // instructions. + // This issue is even more important for high-latency instructions because + // they increase the idle time of the CPU, e.g. : + // imul rax, rbx + // imul rax, rbx + // imul rax, rbx + // imul rax, rbx + // + // To avoid that, we do the renaming statically by generating as many + // independent exclusive assignments as possible (until all possible registers + // are exhausted) e.g.: + // imul rax, rbx + // imul rcx, rbx + // imul rdx, rbx + // imul r8, rbx + // + // Some instruction even make the above static renaming impossible because + // they implicitly read and write from the same operand, e.g. ADC16rr reads + // and writes from EFLAGS. + // In that case we just use a greedy register assignment and hope for the + // best. + + const auto Vars = getVariables(RegInfo, InstrDesc, Context.getReservedRegs()); + + // Generate as many independent exclusive assignments as possible. + constexpr const int kMaxStaticRenames = 20; + std::vector Pattern = + generateIndependentAssignments(State, InstrDesc, Vars, kMaxStaticRenames); + if (Pattern.empty()) { + // We don't even have a single exclusive assignment, fallback to a greedy + // assignment. + // TODO(courbet): Tell the user about this decision. + const std::vector Regs = getGreedyAssignment(Vars); + if (Regs.empty()) + return makeError("No feasible greedy assignment"); + llvm::MCInst Inst = generateMCInst(InstrDesc, Vars, Regs); + if (!State.canAssemble(Inst)) + return makeError("Cannot assemble greedy assignment"); + Pattern.push_back(std::move(Inst)); + } + + // Generate repetitions of the pattern until benchmark_iterations is reached. + std::vector Result; + Result.reserve(NumRepetitions); + for (unsigned i = 0; i < NumRepetitions; ++i) + Result.push_back(Pattern[i % Pattern.size()]); + return Result; +} + +std::vector +UopsBenchmarkRunner::runMeasurements(const LLVMState &State, + const JitFunction &Function, + const unsigned NumRepetitions) const { + const auto &SchedModel = State.getSubtargetInfo().getSchedModel(); + + std::vector Result; + for (unsigned ProcResIdx = 1; + ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) { + const llvm::MCProcResourceDesc &ProcRes = + *SchedModel.getProcResource(ProcResIdx); + const std::string *const EventName = + getEventNameFromProcResName(ProcRes.Name); + if (!EventName) + continue; + pfm::Counter Counter{pfm::PerfEvent(*EventName)}; + Counter.start(); + Function(); + Counter.stop(); + Result.push_back({llvm::itostr(ProcResIdx), + static_cast(Counter.read()) / NumRepetitions, + ProcRes.Name}); + } + return Result; +} + +} // namespace exegesis Index: tools/llvm-exegesis/lib/X86.h =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/X86.h @@ -0,0 +1,32 @@ +//===-- X86.h ---------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// X86 target-specific setup. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_X86_H +#define LLVM_TOOLS_LLVM_EXEGESIS_X86_H + +#include "BenchmarkRunner.h" +#include "LlvmState.h" + +namespace exegesis { + +class X86Filter : public BenchmarkRunner::InstructionFilter { +public: + ~X86Filter() override; + + llvm::Error shouldRun(const LLVMState &State, unsigned Opcode) const override; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_X86_H Index: tools/llvm-exegesis/lib/X86.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/lib/X86.cpp @@ -0,0 +1,38 @@ +//===-- X86.cpp --------------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" + +namespace exegesis { + +static llvm::Error makeError(llvm::Twine Msg) { + return llvm::make_error(Msg, + llvm::inconvertibleErrorCode()); +} + +X86Filter::~X86Filter() = default; + +// Test whether we can generate a snippet for this instruction. +llvm::Error X86Filter::shouldRun(const LLVMState &State, + const unsigned Opcode) const { + const auto &InstrInfo = State.getInstrInfo(); + const llvm::MCInstrDesc &InstrDesc = InstrInfo.get(Opcode); + if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) + return makeError("Unsupported opcode: isBranch/isIndirectBranch"); + if (InstrDesc.isCall() || InstrDesc.isReturn()) + return makeError("Unsupported opcode: isCall/isReturn"); + const auto OpcodeName = InstrInfo.getName(Opcode); + if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") || + OpcodeName.startswith("ADJCALLSTACK")) { + return makeError("Unsupported opcode: Push/Pop/AdjCallStack"); + } + return llvm::ErrorSuccess(); +} + +} // namespace exegesis Index: tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- /dev/null +++ tools/llvm-exegesis/llvm-exegesis.cpp @@ -0,0 +1,115 @@ +//===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Measures execution properties (latencies/uops) of an instruction. +/// +//===----------------------------------------------------------------------===// + +#include "lib/BenchmarkResult.h" +#include "lib/BenchmarkRunner.h" +#include "lib/Latency.h" +#include "lib/LlvmState.h" +#include "lib/PerfHelper.h" +#include "lib/Uops.h" +#include "lib/X86.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include +#include +#include +#include + +static llvm::cl::opt + OpcodeIndex("opcode-index", llvm::cl::desc("opcode to measure, by index"), + llvm::cl::init(0)); + +static llvm::cl::opt + OpcodeName("opcode-name", llvm::cl::desc("opcode to measure, by name"), + llvm::cl::init("")); + +enum class BenchmarkModeE { Latency, Uops }; +static llvm::cl::opt + BenchmarkMode("benchmark-mode", llvm::cl::desc("the benchmark mode to run"), + llvm::cl::values(clEnumValN(BenchmarkModeE::Latency, + "latency", "Instruction Latency"), + clEnumValN(BenchmarkModeE::Uops, "uops", + "Uop Decomposition"))); + +static llvm::cl::opt + NumRepetitions("num-repetitions", + llvm::cl::desc("number of time to repeat the asm snippet"), + llvm::cl::init(10000)); + +namespace exegesis { + +void main() { + if (OpcodeName.empty() == (OpcodeIndex == 0)) { + llvm::report_fatal_error( + "please provide one and only one of 'opcode-index' or 'opcode-name' "); + } + + LLVMInitializeX86Target(); + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86AsmPrinter(); + + // FIXME: Target-specific filter. + X86Filter Filter; + + const LLVMState State; + + unsigned Opcode = OpcodeIndex; + if (Opcode == 0) { + // Resolve opcode name -> opcode. + for (unsigned i = 0; i < State.getInstrInfo().getNumOpcodes(); ++i) { + if (State.getInstrInfo().getName(i) == OpcodeName) { + Opcode = i; + break; + } + } + if (Opcode == 0) { + llvm::report_fatal_error( + llvm::Twine("unknown opcode ").concat(OpcodeName)); + } + } + + std::unique_ptr Runner; + switch (BenchmarkMode) { + case BenchmarkModeE::Latency: + Runner = llvm::make_unique(); + break; + case BenchmarkModeE::Uops: + Runner = llvm::make_unique(); + break; + } + + Runner->run(State, Opcode, NumRepetitions > 0 ? NumRepetitions : 1, Filter) + .writeYamlOrDie("-"); +} + +} // namespace exegesis + +int main(int argc, char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + + if (exegesis::pfm::PfmInitialize()) { + llvm::errs() << "cannot initialize libpfm\n"; + return EXIT_FAILURE; + } + + exegesis::main(); + + exegesis::pfm::PfmTerminate(); + return EXIT_SUCCESS; +} Index: unittests/tools/CMakeLists.txt =================================================================== --- unittests/tools/CMakeLists.txt +++ unittests/tools/CMakeLists.txt @@ -1,4 +1,10 @@ if(LLVM_TARGETS_TO_BUILD MATCHES "X86") - add_subdirectory(llvm-cfi-verify) + add_subdirectory( + llvm-cfi-verify + ) endif() +add_subdirectory( + llvm-exegesis +) + Index: unittests/tools/llvm-exegesis/BenchmarkResultTest.cpp =================================================================== --- /dev/null +++ unittests/tools/llvm-exegesis/BenchmarkResultTest.cpp @@ -0,0 +1,54 @@ +//===-- BenchmarkResultTest.cpp ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkResult.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace exegesis { + +bool operator==(const BenchmarkMeasure &A, const BenchmarkMeasure &B) { + return std::tie(A.Key, A.Value) == std::tie(B.Key, B.Value); +} + +namespace { + +TEST(BenchmarkResultTest, WriteToAndReadFromDisk) { + InstructionBenchmark ToDisk; + + ToDisk.AsmTmpl.Name = "name"; + ToDisk.CpuName = "cpu_name"; + ToDisk.LLVMTriple = "llvm_triple"; + ToDisk.NumRepetitions = 1; + ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, "debug a"}); + ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2}); + ToDisk.Error = "error"; + + const std::string Filename = "data.yaml"; + + ToDisk.writeYamlOrDie(Filename); + + { + const auto FromDisk = InstructionBenchmark::readYamlOrDie(Filename); + + EXPECT_EQ(FromDisk.AsmTmpl.Name, ToDisk.AsmTmpl.Name); + EXPECT_EQ(FromDisk.CpuName, ToDisk.CpuName); + EXPECT_EQ(FromDisk.LLVMTriple, ToDisk.LLVMTriple); + EXPECT_EQ(FromDisk.NumRepetitions, ToDisk.NumRepetitions); + EXPECT_THAT(FromDisk.Measurements, ToDisk.Measurements); + EXPECT_THAT(FromDisk.Error, ToDisk.Error); + } +} + +} // namespace +} // namespace exegesis Index: unittests/tools/llvm-exegesis/CMakeLists.txt =================================================================== --- /dev/null +++ unittests/tools/llvm-exegesis/CMakeLists.txt @@ -0,0 +1,30 @@ +include_directories( + ${CMAKE_SOURCE_DIR}/lib/Target/X86 + ${CMAKE_BINARY_DIR}/lib/Target/X86 + ${CMAKE_SOURCE_DIR}/tools/llvm-exegesis/lib + ) + +set(LLVM_LINK_COMPONENTS + X86Desc + X86Info + X86 + MC + MCParser + Object + Support + Symbolize + ) + +add_llvm_unittest(LLVMExegesisTests + BenchmarkResultTest.cpp + InMemoryAssemblerTest.cpp + InstructionSnippetGeneratorTest.cpp + OperandGraphTest.cpp + PerfHelperTest.cpp + ) +target_link_libraries(LLVMExegesisTests PRIVATE LLVMExegesis) + +if(HAVE_LIBPFM) + target_link_libraries(LLVMExegesisTests PRIVATE pfm) +endif() + Index: unittests/tools/llvm-exegesis/InMemoryAssemblerTest.cpp =================================================================== --- /dev/null +++ unittests/tools/llvm-exegesis/InMemoryAssemblerTest.cpp @@ -0,0 +1,104 @@ +//===-- InMemoryAssemblerTest.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InMemoryAssembler.h" +#include "X86InstrInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +namespace exegesis { +namespace { + +using llvm::MCInst; +using llvm::MCInstBuilder; +using llvm::X86::EAX; +using llvm::X86::MOV32ri; +using llvm::X86::MOV64ri; +using llvm::X86::MOV64ri32; +using llvm::X86::RAX; +using llvm::X86::RETQ; +using llvm::X86::XOR32rr; +using testing::ElementsAre; +using testing::Gt; +using testing::SizeIs; + +class MachineFunctionGeneratorTest : public ::testing::Test { +protected: + MachineFunctionGeneratorTest() + : TT(llvm::sys::getProcessTriple()), + CpuName(llvm::sys::getHostCPUName().str()) {} + + static void SetUpTestCase() { + LLVMInitializeX86Target(); + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86AsmPrinter(); + } + + std::unique_ptr createTargetMachine() { + std::string Error; + const llvm::Target *TheTarget = + llvm::TargetRegistry::lookupTarget(TT, Error); + assert(TheTarget); + const llvm::TargetOptions Options; + return std::unique_ptr( + static_cast(TheTarget->createTargetMachine( + TT, CpuName, "", Options, llvm::Reloc::Model::Static))); + } + +private: + const std::string TT; + const std::string CpuName; +}; + +TEST_F(MachineFunctionGeneratorTest, JitFunction) { + JitFunctionContext Context(createTargetMachine()); + JitFunction Function(std::move(Context), {}); + ASSERT_THAT(Function.getFunctionBytes().str(), ElementsAre(0xc3)); + Function(); +} + +TEST_F(MachineFunctionGeneratorTest, JitFunctionXOR32rr) { + JitFunctionContext Context(createTargetMachine()); + JitFunction Function( + std::move(Context), + {MCInstBuilder(XOR32rr).addReg(EAX).addReg(EAX).addReg(EAX)}); + ASSERT_THAT(Function.getFunctionBytes().str(), ElementsAre(0x31, 0xc0, 0xc3)); + Function(); +} + +TEST_F(MachineFunctionGeneratorTest, JitFunctionMOV64ri) { + JitFunctionContext Context(createTargetMachine()); + JitFunction Function(std::move(Context), + {MCInstBuilder(MOV64ri32).addReg(RAX).addImm(42)}); + ASSERT_THAT(Function.getFunctionBytes().str(), + ElementsAre(0x48, 0xc7, 0xc0, 0x2a, 0x00, 0x00, 0x00, 0xc3)); + Function(); +} + +TEST_F(MachineFunctionGeneratorTest, JitFunctionMOV32ri) { + JitFunctionContext Context(createTargetMachine()); + JitFunction Function(std::move(Context), + {MCInstBuilder(MOV32ri).addReg(EAX).addImm(42)}); + ASSERT_THAT(Function.getFunctionBytes().str(), + ElementsAre(0xb8, 0x2a, 0x00, 0x00, 0x00, 0xc3)); + Function(); +} + +} // namespace +} // namespace exegesis Index: unittests/tools/llvm-exegesis/InstructionSnippetGeneratorTest.cpp =================================================================== --- /dev/null +++ unittests/tools/llvm-exegesis/InstructionSnippetGeneratorTest.cpp @@ -0,0 +1,310 @@ +//===-- InstructionSnippetGeneratorTest.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InstructionSnippetGenerator.h" +#include "X86InstrInfo.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include + +namespace llvm { + +bool operator==(const MCOperand &a, const MCOperand &b) { + if ((a.isValid() == false) && (b.isValid() == false)) + return true; + if (a.isReg() && b.isReg()) + return a.getReg() == b.getReg(); + if (a.isImm() && b.isImm()) + return a.getImm() == b.getImm(); + return false; +} + +} // namespace llvm + +namespace exegesis { +namespace { + +using testing::_; +using testing::AllOf; +using testing::AnyOf; +using testing::Contains; +using testing::ElementsAre; +using testing::Eq; +using testing::Field; +using testing::IsEmpty; +using testing::Not; +using testing::SizeIs; +using testing::UnorderedElementsAre; +using testing::Value; + +using llvm::X86::AL; +using llvm::X86::AX; +using llvm::X86::EFLAGS; +using llvm::X86::RAX; + +class MCInstrDescViewTest : public ::testing::Test { +protected: + MCInstrDescViewTest() + : TheTriple(llvm::sys::getProcessTriple()), + CpuName(llvm::sys::getHostCPUName().str()) {} + + void SetUp() override { + LLVMInitializeX86Target(); + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86TargetMC(); + + std::string Error; + const auto *Target = llvm::TargetRegistry::lookupTarget(TheTriple, Error); + InstrInfo.reset(Target->createMCInstrInfo()); + RegInfo.reset(Target->createMCRegInfo(TheTriple)); + } + + const std::string TheTriple; + const std::string CpuName; + std::unique_ptr InstrInfo; + std::unique_ptr RegInfo; +}; + +MATCHER(IsDef, "") { return arg.IsDef; } +MATCHER(IsUse, "") { return arg.IsUse; } +MATCHER_P2(EqVarAssignement, VariableIndexMatcher, AssignedRegisterMatcher, + "") { + return Value( + arg, + AllOf(Field(&VariableAssignment::VarIdx, VariableIndexMatcher), + Field(&VariableAssignment::AssignedReg, AssignedRegisterMatcher))); +} + +size_t ReturnIndexZero(const size_t upper_bound) { return 0; } + +TEST_F(MCInstrDescViewTest, XOR64rr) { + const llvm::MCInstrDesc &InstrDesc = InstrInfo->get(llvm::X86::XOR64rr); + const auto Vars = + getVariables(*RegInfo, InstrDesc, llvm::BitVector(RegInfo->getNumRegs())); + + // XOR64rr has the following operands: + // 0. out register + // 1. in register (tied to out) + // 2. in register + // 3. out EFLAGS (implicit) + // + // This translates to 3 variables, one for 0 and 1, one for 2, one for 3. + ASSERT_THAT(Vars, SizeIs(3)); + + EXPECT_THAT(Vars[0].ExplicitOperands, ElementsAre(0, 1)); + EXPECT_THAT(Vars[1].ExplicitOperands, ElementsAre(2)); + EXPECT_THAT(Vars[2].ExplicitOperands, ElementsAre()); // implicit + + EXPECT_THAT(Vars[0], AllOf(IsUse(), IsDef())); + EXPECT_THAT(Vars[1], AllOf(IsUse(), Not(IsDef()))); + EXPECT_THAT(Vars[2], AllOf(Not(IsUse()), IsDef())); + + EXPECT_THAT(Vars[0].PossibleRegisters, Contains(RAX)); + EXPECT_THAT(Vars[1].PossibleRegisters, Contains(RAX)); + EXPECT_THAT(Vars[2].PossibleRegisters, ElementsAre(EFLAGS)); + + // Computing chains. + const auto Chains = computeSequentialAssignmentChains(*RegInfo, Vars); + + // Because operands 0 and 1 are tied together any possible value for variable + // 0 would do. + for (const auto ® : Vars[0].PossibleRegisters) { + EXPECT_THAT(Chains, Contains(ElementsAre(EqVarAssignement(0, reg)))); + } + + // We also have chains going through operand 0 to 2 (i.e. Vars 0 and 1). + EXPECT_THAT(Vars[0].PossibleRegisters, Eq(Vars[1].PossibleRegisters)) + << "Variables 0 and 1 are of the same class"; + for (const auto ® : Vars[0].PossibleRegisters) { + EXPECT_THAT(Chains, + Contains(UnorderedElementsAre(EqVarAssignement(0, reg), + EqVarAssignement(1, reg)))); + } + + // EFLAGS does not appear as an input therefore no chain can contain EFLAGS. + EXPECT_THAT(Chains, Not(Contains(Contains(EqVarAssignement(_, EFLAGS))))); + + // Computing assignment. + const auto Regs = getRandomAssignment(Vars, Chains, &ReturnIndexZero); + EXPECT_THAT(Regs, ElementsAre(RAX, RAX, EFLAGS)); + + // Generating assembler representation. + const llvm::MCInst Inst = generateMCInst(InstrDesc, Vars, Regs); + EXPECT_THAT(Inst.getOpcode(), llvm::X86::XOR64rr); + EXPECT_THAT(Inst.getNumOperands(), 3); + EXPECT_THAT(Inst.getOperand(0), llvm::MCOperand::createReg(RAX)); + EXPECT_THAT(Inst.getOperand(1), llvm::MCOperand::createReg(RAX)); + EXPECT_THAT(Inst.getOperand(2), llvm::MCOperand::createReg(RAX)); +} + +TEST_F(MCInstrDescViewTest, AAA) { + const llvm::MCInstrDesc &InstrDesc = InstrInfo->get(llvm::X86::AAA); + const auto Vars = + getVariables(*RegInfo, InstrDesc, llvm::BitVector(RegInfo->getNumRegs())); + + // AAA has the following operands: + // 0. out AX (implicit) + // 1. out EFLAGS (implicit) + // 2. in AL (implicit) + // 3. in EFLAGS (implicit) + // + // This translates to 4 Vars (non are tied together). + ASSERT_THAT(Vars, SizeIs(4)); + + EXPECT_THAT(Vars[0].ExplicitOperands, ElementsAre()); // implicit + EXPECT_THAT(Vars[1].ExplicitOperands, ElementsAre()); // implicit + EXPECT_THAT(Vars[2].ExplicitOperands, ElementsAre()); // implicit + EXPECT_THAT(Vars[3].ExplicitOperands, ElementsAre()); // implicit + + EXPECT_THAT(Vars[0], AllOf(Not(IsUse()), IsDef())); + EXPECT_THAT(Vars[1], AllOf(Not(IsUse()), IsDef())); + EXPECT_THAT(Vars[2], AllOf(IsUse(), Not(IsDef()))); + EXPECT_THAT(Vars[3], AllOf(IsUse(), Not(IsDef()))); + + EXPECT_THAT(Vars[0].PossibleRegisters, ElementsAre(AX)); + EXPECT_THAT(Vars[1].PossibleRegisters, ElementsAre(EFLAGS)); + EXPECT_THAT(Vars[2].PossibleRegisters, ElementsAre(AL)); + EXPECT_THAT(Vars[3].PossibleRegisters, ElementsAre(EFLAGS)); + + const auto chains = computeSequentialAssignmentChains(*RegInfo, Vars); + EXPECT_THAT(chains, + ElementsAre(UnorderedElementsAre(EqVarAssignement(0, AX), + EqVarAssignement(2, AL)), + UnorderedElementsAre(EqVarAssignement(1, EFLAGS), + EqVarAssignement(3, EFLAGS)))); + + // Computing assignment. + const auto Regs = getRandomAssignment(Vars, chains, &ReturnIndexZero); + EXPECT_THAT(Regs, ElementsAre(AX, EFLAGS, AL, EFLAGS)); + + // Generating assembler representation. + const llvm::MCInst Inst = generateMCInst(InstrDesc, Vars, Regs); + EXPECT_THAT(Inst.getOpcode(), llvm::X86::AAA); + EXPECT_THAT(Inst.getNumOperands(), 0) << "All operands are implicit"; +} + +TEST_F(MCInstrDescViewTest, ReservedRegisters) { + llvm::BitVector ReservedRegisters(RegInfo->getNumRegs()); + + const llvm::MCInstrDesc &InstrDesc = InstrInfo->get(llvm::X86::XOR64rr); + { + const auto Vars = getVariables(*RegInfo, InstrDesc, ReservedRegisters); + ASSERT_THAT(Vars, SizeIs(3)); + EXPECT_THAT(Vars[0].PossibleRegisters, Contains(RAX)); + EXPECT_THAT(Vars[1].PossibleRegisters, Contains(RAX)); + } + + // Disable RAX. + ReservedRegisters.set(RAX); + { + const auto Vars = getVariables(*RegInfo, InstrDesc, ReservedRegisters); + ASSERT_THAT(Vars, SizeIs(3)); + EXPECT_THAT(Vars[0].PossibleRegisters, Not(Contains(RAX))); + EXPECT_THAT(Vars[1].PossibleRegisters, Not(Contains(RAX))); + } +} + +Variable MakeVariableWithRegisters(bool IsReg, + std::initializer_list Regs) { + assert((IsReg || (Regs.size() == 0)) && "IsReg => !(Regs.size() == 0)"); + Variable var; + var.IsReg = IsReg; + var.PossibleRegisters.insert(Regs.begin(), Regs.end()); + return var; +} + +TEST(getExclusiveAssignment, TriviallyFeasible) { + const std::vector Vars = { + MakeVariableWithRegisters(true, {3}), + MakeVariableWithRegisters(false, {}), + MakeVariableWithRegisters(true, {4}), + MakeVariableWithRegisters(true, {5}), + }; + const auto Regs = getExclusiveAssignment(Vars); + EXPECT_THAT(Regs, ElementsAre(3, 0, 4, 5)); +} + +TEST(getExclusiveAssignment, TriviallyInfeasible1) { + const std::vector Vars = { + MakeVariableWithRegisters(true, {3}), + MakeVariableWithRegisters(true, {}), + MakeVariableWithRegisters(true, {4}), + MakeVariableWithRegisters(true, {5}), + }; + const auto Regs = getExclusiveAssignment(Vars); + EXPECT_THAT(Regs, ElementsAre()); +} + +TEST(getExclusiveAssignment, TriviallyInfeasible) { + const std::vector Vars = { + MakeVariableWithRegisters(true, {4}), + MakeVariableWithRegisters(true, {4}), + }; + const auto Regs = getExclusiveAssignment(Vars); + EXPECT_THAT(Regs, ElementsAre()); +} + +TEST(getExclusiveAssignment, Feasible1) { + const std::vector Vars = { + MakeVariableWithRegisters(true, {4, 3}), + MakeVariableWithRegisters(true, {6, 3}), + MakeVariableWithRegisters(true, {6, 4}), + }; + const auto Regs = getExclusiveAssignment(Vars); + ASSERT_THAT(Regs, AnyOf(ElementsAre(3, 6, 4), ElementsAre(4, 3, 6))); +} + +TEST(getExclusiveAssignment, Feasible2) { + const std::vector Vars = { + MakeVariableWithRegisters(true, {1, 2}), + MakeVariableWithRegisters(true, {3, 4}), + }; + const auto Regs = getExclusiveAssignment(Vars); + ASSERT_THAT(Regs, AnyOf(ElementsAre(1, 3), ElementsAre(1, 4), + ElementsAre(2, 3), ElementsAre(2, 4))); +} + +TEST(getGreedyAssignment, Infeasible) { + const std::vector Vars = { + MakeVariableWithRegisters(true, {}), + MakeVariableWithRegisters(true, {1, 2}), + }; + const auto Regs = getGreedyAssignment(Vars); + ASSERT_THAT(Regs, ElementsAre()); +} + +TEST(getGreedyAssignment, FeasibleNoFallback) { + const std::vector Vars = { + MakeVariableWithRegisters(true, {1, 2}), + MakeVariableWithRegisters(false, {}), + MakeVariableWithRegisters(true, {2, 3}), + }; + const auto Regs = getGreedyAssignment(Vars); + ASSERT_THAT(Regs, ElementsAre(1, 0, 2)); +} + +TEST(getGreedyAssignment, Feasible) { + const std::vector Vars = { + MakeVariableWithRegisters(false, {}), + MakeVariableWithRegisters(true, {1, 2}), + MakeVariableWithRegisters(true, {2, 3}), + MakeVariableWithRegisters(true, {2, 3}), + MakeVariableWithRegisters(true, {2, 3}), + }; + const auto Regs = getGreedyAssignment(Vars); + ASSERT_THAT(Regs, ElementsAre(0, 1, 2, 3, 2)); +} + +} // namespace +} // namespace exegesis Index: unittests/tools/llvm-exegesis/OperandGraphTest.cpp =================================================================== --- /dev/null +++ unittests/tools/llvm-exegesis/OperandGraphTest.cpp @@ -0,0 +1,48 @@ +//===-- OperandGraphTest.cpp ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OperandGraph.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::ElementsAre; +using testing::IsEmpty; +using testing::Not; + +namespace exegesis { +namespace graph { +namespace { + +static const auto kIn = Node::In(); +static const auto kOut = Node::Out(); + +TEST(OperandGraphTest, NoPath) { + Graph TheGraph; + EXPECT_THAT(TheGraph.getPathFrom(kIn, kOut), IsEmpty()); +} + +TEST(OperandGraphTest, Connecting) { + Graph TheGraph; + TheGraph.connect(kIn, kOut); + EXPECT_THAT(TheGraph.getPathFrom(kIn, kOut), Not(IsEmpty())); + EXPECT_THAT(TheGraph.getPathFrom(kIn, kOut), ElementsAre(kIn, kOut)); +} + +TEST(OperandGraphTest, ConnectingThroughVariable) { + const Node Var = Node::Var(1); + Graph TheGraph; + TheGraph.connect(kIn, Var); + TheGraph.connect(Var, kOut); + EXPECT_THAT(TheGraph.getPathFrom(kIn, kOut), Not(IsEmpty())); + EXPECT_THAT(TheGraph.getPathFrom(kIn, kOut), ElementsAre(kIn, Var, kOut)); +} + +} // namespace +} // namespace graph +} // namespace exegesis Index: unittests/tools/llvm-exegesis/PerfHelperTest.cpp =================================================================== --- /dev/null +++ unittests/tools/llvm-exegesis/PerfHelperTest.cpp @@ -0,0 +1,49 @@ +//===-- PerfHelperTest.cpp --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PerfHelper.h" +#include "llvm/Config/config.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace exegesis { +namespace pfm { +namespace { + +using ::testing::Gt; +using ::testing::IsEmpty; +using ::testing::Not; + +TEST(PerfHelperTest, FunctionalTest) { +#ifdef HAVE_LIBPFM + ASSERT_FALSE(PfmInitialize()); + const PerfEvent SingleEvent("CYCLES:u"); + const auto &EmptyFn = []() {}; + std::string CallbackEventName; + std::string CallbackEventNameFullyQualifed; + int64_t CallbackEventCycles; + Measure(llvm::makeArrayRef(SingleEvent), + [&](const PerfEvent &Event, int64_t Value) { + CallbackEventName = Event.name(); + CallbackEventNameFullyQualifed = Event.getPfmEventString(); + CallbackEventCycles = Value; + }, + EmptyFn); + EXPECT_EQ(CallbackEventName, "CYCLES:u"); + EXPECT_THAT(CallbackEventNameFullyQualifed, Not(IsEmpty())); + PfmTerminate(); +#else + ASSERT_TRUE(PfmInitialize()); +#endif + +} + +} // namespace +} // namespace pfm +} // namespace exegesis