Index: llvm/trunk/CMakeLists.txt =================================================================== --- llvm/trunk/CMakeLists.txt +++ llvm/trunk/CMakeLists.txt @@ -435,6 +435,16 @@ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) endif( LLVM_USE_OPROFILE ) +option(LLVM_USE_PERF + "Use perf JIT interface to inform perf about JIT code" OFF) + +# If enabled, verify we are on a platform that supports perf. +if( LLVM_USE_PERF ) + if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) + message(FATAL_ERROR "perf support is available on Linux only.") + endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) +endif( LLVM_USE_PERF ) + set(LLVM_USE_SANITIZER "" CACHE STRING "Define the sanitizer used to build binaries and tests.") option(LLVM_OPTIMIZE_SANITIZED_BUILDS "Pass -O1 on debug sanitizer builds" ON) @@ -648,6 +658,9 @@ if (LLVM_USE_OPROFILE) set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT) endif (LLVM_USE_OPROFILE) +if (LLVM_USE_PERF) + set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents) +endif (LLVM_USE_PERF) message(STATUS "Constructing LLVMBuild project information") execute_process( Index: llvm/trunk/docs/CMake.rst =================================================================== --- llvm/trunk/docs/CMake.rst +++ llvm/trunk/docs/CMake.rst @@ -375,6 +375,9 @@ tools. Defaults to ON. + **LLVM_USE_PERF**:BOOL + Enable building support for Perf (linux profiling tool) JIT support. Defaults to OFF. + **LLVM_ENABLE_ZLIB**:BOOL Enable building with zlib to support compression/uncompression in LLVM tools. Defaults to ON. Index: llvm/trunk/include/llvm-c/ExecutionEngine.h =================================================================== --- llvm/trunk/include/llvm-c/ExecutionEngine.h +++ llvm/trunk/include/llvm-c/ExecutionEngine.h @@ -187,6 +187,7 @@ LLVMJITEventListenerRef LLVMCreateGDBRegistrationListener(void); LLVMJITEventListenerRef LLVMCreateIntelJITEventListener(void); LLVMJITEventListenerRef LLVMCreateOprofileJITEventListener(void); +LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void); /** * @} Index: llvm/trunk/include/llvm/Config/llvm-config.h.cmake =================================================================== --- llvm/trunk/include/llvm/Config/llvm-config.h.cmake +++ llvm/trunk/include/llvm/Config/llvm-config.h.cmake @@ -62,6 +62,9 @@ /* Define if we have the oprofile JIT-support library */ #cmakedefine01 LLVM_USE_OPROFILE +/* Define if we have the perf JIT-support library */ +#cmakedefine01 LLVM_USE_PERF + /* Major version of the LLVM API */ #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} Index: llvm/trunk/include/llvm/ExecutionEngine/JITEventListener.h =================================================================== --- llvm/trunk/include/llvm/ExecutionEngine/JITEventListener.h +++ llvm/trunk/include/llvm/ExecutionEngine/JITEventListener.h @@ -117,6 +117,15 @@ } #endif // USE_OPROFILE +#if LLVM_USE_PERF + static JITEventListener *createPerfJITEventListener(); +#else + static JITEventListener *createPerfJITEventListener() + { + return nullptr; + } +#endif // USE_PERF + private: virtual void anchor(); }; @@ -133,4 +142,8 @@ LLVMJITEventListenerRef LLVMCreateOProfileJITEventListener(void); #endif +#ifndef LLVM_USE_PERF +LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void); +#endif + #endif // LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H Index: llvm/trunk/lib/ExecutionEngine/CMakeLists.txt =================================================================== --- llvm/trunk/lib/ExecutionEngine/CMakeLists.txt +++ llvm/trunk/lib/ExecutionEngine/CMakeLists.txt @@ -30,3 +30,7 @@ if( LLVM_USE_INTEL_JITEVENTS ) add_subdirectory(IntelJITEvents) endif( LLVM_USE_INTEL_JITEVENTS ) + +if( LLVM_USE_PERF ) + add_subdirectory(PerfJITEvents) +endif( LLVM_USE_PERF ) Index: llvm/trunk/lib/ExecutionEngine/LLVMBuild.txt =================================================================== --- llvm/trunk/lib/ExecutionEngine/LLVMBuild.txt +++ llvm/trunk/lib/ExecutionEngine/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc +subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents [component_0] type = Library Index: llvm/trunk/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt =================================================================== --- llvm/trunk/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt +++ llvm/trunk/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(LLVMPerfJITEvents + PerfJITEventListener.cpp + ) + +add_dependencies(LLVMPerfJITEvents LLVMCodeGen) Index: llvm/trunk/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt =================================================================== --- llvm/trunk/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt +++ llvm/trunk/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = OptionalLibrary +name = PerfJITEvents +parent = ExecutionEngine +required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support Index: llvm/trunk/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp =================================================================== --- llvm/trunk/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +++ llvm/trunk/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp @@ -0,0 +1,497 @@ +//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITEventListener object that tells perf about JITted +// functions, including source line information. +// +// Documentation for perf jit integration is available at: +// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt +// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errno.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" + +#include // mmap() +#include // getpid() +#include // clock_gettime(), time(), localtime_r() */ +#include // for getpid(), read(), close() + +using namespace llvm; +using namespace llvm::object; +typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; + +namespace { + +// language identifier (XXX: should we generate something better from debug +// info?) +#define JIT_LANG "llvm-IR" +#define LLVM_PERF_JIT_MAGIC \ + ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ + (uint32_t)'D') +#define LLVM_PERF_JIT_VERSION 1 + +// bit 0: set if the jitdump file is using an architecture-specific timestamp +// clock source +#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) + +struct LLVMPerfJitHeader; + +class PerfJITEventListener : public JITEventListener { +public: + PerfJITEventListener(); + ~PerfJITEventListener() { + if (MarkerAddr) + CloseMarker(); + } + + void NotifyObjectEmitted(const ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &L) override; + void NotifyFreeingObject(const ObjectFile &Obj) override; + +private: + bool InitDebuggingDir(); + bool OpenMarker(); + void CloseMarker(); + static bool FillMachine(LLVMPerfJitHeader &hdr); + + void NotifyCode(Expected &Symbol, uint64_t CodeAddr, + uint64_t CodeSize); + void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines); + + // cache lookups + pid_t Pid; + + // base directory for output data + std::string JitPath; + + // output data stream, closed via Dumpstream + int DumpFd = -1; + + // output data stream + std::unique_ptr Dumpstream; + + // prevent concurrent dumps from messing up the output file + sys::Mutex Mutex; + + // perf mmap marker + void *MarkerAddr = NULL; + + // perf support ready + bool SuccessfullyInitialized = false; + + // identifier for functions, primarily to identify when moving them around + uint64_t CodeGeneration = 1; +}; + +// The following are POD struct definitions from the perf jit specification + +enum LLVMPerfJitRecordType { + JIT_CODE_LOAD = 0, + JIT_CODE_MOVE = 1, // not emitted, code isn't moved + JIT_CODE_DEBUG_INFO = 2, + JIT_CODE_CLOSE = 3, // not emitted, unnecessary + JIT_CODE_UNWINDING_INFO = 4, // not emitted + + JIT_CODE_MAX +}; + +struct LLVMPerfJitHeader { + uint32_t Magic; // characters "JiTD" + uint32_t Version; // header version + uint32_t TotalSize; // total size of header + uint32_t ElfMach; // elf mach target + uint32_t Pad1; // reserved + uint32_t Pid; + uint64_t Timestamp; // timestamp + uint64_t Flags; // flags +}; + +// record prefix (mandatory in each record) +struct LLVMPerfJitRecordPrefix { + uint32_t Id; // record type identifier + uint32_t TotalSize; + uint64_t Timestamp; +}; + +struct LLVMPerfJitRecordCodeLoad { + LLVMPerfJitRecordPrefix Prefix; + + uint32_t Pid; + uint32_t Tid; + uint64_t Vma; + uint64_t CodeAddr; + uint64_t CodeSize; + uint64_t CodeIndex; +}; + +struct LLVMPerfJitDebugEntry { + uint64_t Addr; + int Lineno; // source line number starting at 1 + int Discrim; // column discriminator, 0 is default + // followed by null terminated filename, \xff\0 if same as previous entry +}; + +struct LLVMPerfJitRecordDebugInfo { + LLVMPerfJitRecordPrefix Prefix; + + uint64_t CodeAddr; + uint64_t NrEntry; + // followed by NrEntry LLVMPerfJitDebugEntry records +}; + +static inline uint64_t timespec_to_ns(const struct timespec *ts) { + const uint64_t NanoSecPerSec = 1000000000; + return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; +} + +static inline uint64_t perf_get_timestamp(void) { + struct timespec ts; + int ret; + + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + if (ret) + return 0; + + return timespec_to_ns(&ts); +} + +PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) { + // check if clock-source is supported + if (!perf_get_timestamp()) { + errs() << "kernel does not support CLOCK_MONOTONIC\n"; + return; + } + + if (!InitDebuggingDir()) { + errs() << "could not initialize debugging directory\n"; + return; + } + + std::string Filename; + raw_string_ostream FilenameBuf(Filename); + FilenameBuf << JitPath << "/jit-" << Pid << ".dump"; + + // Need to open ourselves, because we need to hand the FD to OpenMarker() and + // raw_fd_ostream doesn't expose the FD. + using sys::fs::openFileForWrite; + if (auto EC = + openFileForReadWrite(FilenameBuf.str(), DumpFd, + sys::fs::CD_CreateNew, sys::fs::OF_None)) { + errs() << "could not open JIT dump file " << FilenameBuf.str() << ": " + << EC.message() << "\n"; + return; + } + + Dumpstream = make_unique(DumpFd, true); + + LLVMPerfJitHeader Header = {0}; + if (!FillMachine(Header)) + return; + + // signal this process emits JIT information + if (!OpenMarker()) + return; + + // emit dumpstream header + Header.Magic = LLVM_PERF_JIT_MAGIC; + Header.Version = LLVM_PERF_JIT_VERSION; + Header.TotalSize = sizeof(Header); + Header.Pid = Pid; + Header.Timestamp = perf_get_timestamp(); + Dumpstream->write(reinterpret_cast(&Header), sizeof(Header)); + + // Everything initialized, can do profiling now. + if (!Dumpstream->has_error()) + SuccessfullyInitialized = true; +} + +void PerfJITEventListener::NotifyObjectEmitted( + const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) { + + if (!SuccessfullyInitialized) + return; + + OwningBinary DebugObjOwner = L.getObjectForDebug(Obj); + const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); + + // Get the address of the object image for use as a unique identifier + std::unique_ptr Context = DWARFContext::create(DebugObj); + + // Use symbol info to iterate over functions in the object. + for (const std::pair &P : computeSymbolSizes(DebugObj)) { + SymbolRef Sym = P.first; + std::string SourceFileName; + + Expected SymTypeOrErr = Sym.getType(); + if (!SymTypeOrErr) { + // There's not much we can with errors here + consumeError(SymTypeOrErr.takeError()); + continue; + } + SymbolRef::Type SymType = *SymTypeOrErr; + if (SymType != SymbolRef::ST_Function) + continue; + + Expected Name = Sym.getName(); + if (!Name) { + consumeError(Name.takeError()); + continue; + } + + Expected AddrOrErr = Sym.getAddress(); + if (!AddrOrErr) { + consumeError(AddrOrErr.takeError()); + continue; + } + uint64_t Addr = *AddrOrErr; + uint64_t Size = P.second; + + // According to spec debugging info has to come before loading the + // corresonding code load. + DILineInfoTable Lines = Context->getLineInfoForAddressRange( + Addr, Size, FileLineInfoKind::AbsoluteFilePath); + + NotifyDebug(Addr, Lines); + NotifyCode(Name, Addr, Size); + } + + Dumpstream->flush(); +} + +void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) { + // perf currently doesn't have an interface for unloading. But munmap()ing the + // code section does, so that's ok. +} + +bool PerfJITEventListener::InitDebuggingDir() { + time_t Time; + struct tm LocalTime; + char TimeBuffer[sizeof("YYYYMMDD")]; + SmallString<64> Path; + + // search for location to dump data to + if (const char *BaseDir = getenv("JITDUMPDIR")) + Path.append(BaseDir); + else if (!sys::path::home_directory(Path)) + Path = "."; + + // create debug directory + Path += "/.debug/jit/"; + if (auto EC = sys::fs::create_directories(Path)) { + errs() << "could not create jit cache directory " << Path << ": " + << EC.message() << "\n"; + return false; + } + + // create unique directory for dump data related to this process + time(&Time); + localtime_r(&Time, &LocalTime); + strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); + Path += JIT_LANG "-jit-"; + Path += TimeBuffer; + + SmallString<128> UniqueDebugDir; + + using sys::fs::createUniqueDirectory; + if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { + errs() << "could not create unique jit cache directory " << UniqueDebugDir + << ": " << EC.message() << "\n"; + return false; + } + + JitPath = UniqueDebugDir.str(); + + return true; +} + +bool PerfJITEventListener::OpenMarker() { + // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap + // is captured either live (perf record running when we mmap) or in deferred + // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump + // file for more meta data info about the jitted code. Perf report/annotate + // detect this special filename and process the jitdump file. + // + // Mapping must be PROT_EXEC to ensure it is captured by perf record + // even when not using -d option. + MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC, + MAP_PRIVATE, DumpFd, 0); + + if (MarkerAddr == MAP_FAILED) { + errs() << "could not mmap JIT marker\n"; + return false; + } + return true; +} + +void PerfJITEventListener::CloseMarker() { + if (!MarkerAddr) + return; + + munmap(MarkerAddr, sys::Process::getPageSize()); + MarkerAddr = nullptr; +} + +bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) { + char id[16]; + struct { + uint16_t e_type; + uint16_t e_machine; + } info; + + size_t RequiredMemory = sizeof(id) + sizeof(info); + + ErrorOr> MB = + MemoryBuffer::getFileSlice("/proc/self/exe", + RequiredMemory, + 0); + + // This'll not guarantee that enough data was actually read from the + // underlying file. Instead the trailing part of the buffer would be + // zeroed. Given the ELF signature check below that seems ok though, + // it's unlikely that the file ends just after that, and the + // consequence would just be that perf wouldn't recognize the + // signature. + if (auto EC = MB.getError()) { + errs() << "could not open /proc/self/exe: " << EC.message() << "\n"; + return false; + } + + memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); + memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); + + // check ELF signature + if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { + errs() << "invalid elf signature\n"; + return false; + } + + hdr.ElfMach = info.e_machine; + + return true; +} + +void PerfJITEventListener::NotifyCode(Expected &Symbol, + uint64_t CodeAddr, uint64_t CodeSize) { + assert(SuccessfullyInitialized); + + // 0 length functions can't have samples. + if (CodeSize == 0) + return; + + LLVMPerfJitRecordCodeLoad rec; + rec.Prefix.Id = JIT_CODE_LOAD; + rec.Prefix.TotalSize = sizeof(rec) + // debug record itself + Symbol->size() + 1 + // symbol name + CodeSize; // and code + rec.Prefix.Timestamp = perf_get_timestamp(); + + rec.CodeSize = CodeSize; + rec.Vma = 0; + rec.CodeAddr = CodeAddr; + rec.Pid = Pid; + rec.Tid = get_threadid(); + + // avoid interspersing output + MutexGuard Guard(Mutex); + + rec.CodeIndex = CodeGeneration++; // under lock! + + Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); + Dumpstream->write(Symbol->data(), Symbol->size() + 1); + Dumpstream->write(reinterpret_cast(CodeAddr), CodeSize); +} + +void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, + DILineInfoTable Lines) { + assert(SuccessfullyInitialized); + + // Didn't get useful debug info. + if (Lines.empty()) + return; + + LLVMPerfJitRecordDebugInfo rec; + rec.Prefix.Id = JIT_CODE_DEBUG_INFO; + rec.Prefix.TotalSize = sizeof(rec); // will be increased further + rec.Prefix.Timestamp = perf_get_timestamp(); + rec.CodeAddr = CodeAddr; + rec.NrEntry = Lines.size(); + + // compute total size size of record (variable due to filenames) + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) { + DILineInfo &line = It->second; + rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry); + rec.Prefix.TotalSize += line.FileName.size() + 1; + } + + // The debug_entry describes the source line information. It is defined as + // follows in order: + // * uint64_t code_addr: address of function for which the debug information + // is generated + // * uint32_t line : source file line number (starting at 1) + // * uint32_t discrim : column discriminator, 0 is default + // * char name[n] : source file name in ASCII, including null termination + + // avoid interspersing output + MutexGuard Guard(Mutex); + + Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); + + for (DILineInfoTable::iterator It = Begin; It != End; ++It) { + LLVMPerfJitDebugEntry LineInfo; + DILineInfo &Line = It->second; + + LineInfo.Addr = It->first; + // The function re-created by perf is preceded by a elf + // header. Need to adjust for that, otherwise the results are + // wrong. + LineInfo.Addr += 0x40; + LineInfo.Lineno = Line.Line; + LineInfo.Discrim = Line.Discriminator; + + Dumpstream->write(reinterpret_cast(&LineInfo), + sizeof(LineInfo)); + Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1); + } +} + +// There should be only a single event listener per process, otherwise perf gets +// confused. +llvm::ManagedStatic PerfListener; + +} // end anonymous namespace + +namespace llvm { +JITEventListener *JITEventListener::createPerfJITEventListener() { + return &*PerfListener; +} + +} // namespace llvm + +LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void) +{ + return wrap(JITEventListener::createPerfJITEventListener()); +} Index: llvm/trunk/tools/lli/CMakeLists.txt =================================================================== --- llvm/trunk/tools/lli/CMakeLists.txt +++ llvm/trunk/tools/lli/CMakeLists.txt @@ -36,6 +36,15 @@ ) endif( LLVM_USE_INTEL_JITEVENTS ) +if( LLVM_USE_PERF ) + set(LLVM_LINK_COMPONENTS + ${LLVM_LINK_COMPONENTS} + DebugInfoDWARF + PerfJITEvents + Object + ) +endif( LLVM_USE_PERF ) + add_llvm_tool(lli lli.cpp Index: llvm/trunk/tools/lli/lli.cpp =================================================================== --- llvm/trunk/tools/lli/lli.cpp +++ llvm/trunk/tools/lli/lli.cpp @@ -522,6 +522,9 @@ JITEventListener::createOProfileJITEventListener()); EE->RegisterJITEventListener( JITEventListener::createIntelJITEventListener()); + if (!RemoteMCJIT) + EE->RegisterJITEventListener( + JITEventListener::createPerfJITEventListener()); if (!NoLazyCompilation && RemoteMCJIT) { WithColor::warning(errs(), argv[0])