diff --git a/llvm/docs/CommandGuide/llvm-profgen.rst b/llvm/docs/CommandGuide/llvm-profgen.rst new file mode 100644 --- /dev/null +++ b/llvm/docs/CommandGuide/llvm-profgen.rst @@ -0,0 +1,42 @@ +llvm-profgen - LLVM SPGO profile generation tool +================================= + +.. program:: llvm-profgen + +SYNOPSIS +-------- + +:program:`llvm-profgen` [*commands*] [*options*] + +DESCRIPTION +----------- + +The :program:`llvm-profgen` utility generates a profile data file +from given perf script data files for sampling-based profile-guided +optimization(SPGO). + +COMMANDS +-------- +At least one of the following commands are required: + +.. option:: --perfscript= + + Path of perf-script trace created by Linux perf tool with `script` + command(the raw perf.data should be profiled with -b). + +.. option:: --output= + + Path of the output profile file. + +OPTIONS +------- +:program:`llvm-profgen` supports the following options: + +.. option:: --binary= + + Path of the input profiled binary files. If no file path is specified, the + path of the actual profiled binaries will be used instead. + +.. option:: --show-mmap + + Print mmap events. diff --git a/llvm/test/tools/llvm-profgen/lit.local.cfg b/llvm/test/tools/llvm-profgen/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/lit.local.cfg @@ -0,0 +1,6 @@ +import subprocess +import lit.util + +config.suffixes = ['.test', '.ll', '.s', '.yaml'] +if not lit.util.which("llvm-profgen", config.llvm_tools_dir): + config.unsupported = True diff --git a/llvm/test/tools/llvm-profgen/mmapEvent.test b/llvm/test/tools/llvm-profgen/mmapEvent.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/mmapEvent.test @@ -0,0 +1,27 @@ +; RUN: llvm-profgen --perfscript=%s --output=%t --show-mmap | FileCheck %s + +PERF_RECORD_MMAP2 2580483/2580483: [0x400000(0x1000) @ 0 103:01 539973862 1972407324]: r-xp /home/a.out +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505b40000(0x224000) @ 0 08:04 19532214 4169021329]: r-xp /usr/lib64/ld-2.17.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7ffe88097000(0x1000) @ 0 00:00 0 0]: r-xp [vdso] +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505d56000(0xa000) @ 0 08:04 19530021 4190740662]: r-xp /usr/lib64/perf_fopen_hook.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7f250593c000(0x204000) @ 0 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7f250556e000(0x3ce000) @ 0 08:04 19532221 4003737677]: r-xp /usr/lib64/libc-2.17.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505358000(0x216000) @ 0 08:04 19534595 2609212015]: r-xp /usr/lib64/libz.so.1.2.7 + 7f2505b49811 0x7f2505b49811/0x7f2505b509f0/P/-/-/0 0x7f2505b4974c/0x7f2505b4975b/P/-/-/0 0x7f2505b49837/0x7f2505b49720/P/-/-/0 0x7f2505b50a5a/0x7f2505b49816/P/-/-/0 0x7f2505b50a27/0x7f2505b50a50/P/-/-/0 0x7f2505b50a36/0x7f2505b50a20/P/-/-/0 0x7f2505b59dd0/0x7f2505b50a34/P/-/-/0 0x7f2505b59db4/0x7f2505b59dc3/P/-/-/0 0x7f2505b50a2f/0x7f2505b59db0/P/-/-/0 0x7f2505b50a15/0x7f2505b50a29/P/-/-/0 0x7f2505b59dd0/0x7f2505b50a05/P/-/-/0 0x7f2505b59db4/0x7f2505b59dc3/P/-/-/0 0x7f2505b50a00/0x7f2505b59db0/P/-/-/0 0x7f2505b49811/0x7f2505b509f0/P/-/-/0 0x7f2505b4974c/0x7f2505b4975b/P/-/-/0 0x7f2505b4a08a/0x7f2505b496a0/P/-/-/0 +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505d56000(0x8000) @ 0 08:04 19530021 4190740662]: r-xp /usr/lib64/perf_fopen_hook.so + 4006b1 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505156000(0x202000) @ 0 103:01 539962022 734061270]: r-xp /home/hoy/test/dlopen/helper.so + 4006b1 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505156000(0x202000) @ 0 103:01 539962022 734061270]: r-xp /home/hoy/test/dlopen/helper.so + + +; CHECK: Mmap: Binary /home/a.out loaded at 0x400000 +; CHECK: Mmap: Binary /usr/lib64/ld-2.17.so loaded at 0x7f2505b40000 +; CHECK: Mmap: Binary [vdso] loaded at 0x7ffe88097000 +; CHECK: Mmap: Binary /usr/lib64/perf_fopen_hook.so loaded at 0x7f2505d56000 +; CHECK: Mmap: Binary /usr/lib64/libdl-2.17.so loaded at 0x7f250593c000 +; CHECK: Mmap: Binary /usr/lib64/libc-2.17.so loaded at 0x7f250556e000 +; CHECK: Mmap: Binary /usr/lib64/libz.so.1.2.7 loaded at 0x7f2505358000 +; CHECK: Mmap: Binary /usr/lib64/perf_fopen_hook.so loaded at 0x7f2505d56000 +; CHECK: Mmap: Binary /home/hoy/test/dlopen/helper.so loaded at 0x7f2505156000 +; CHECK: Mmap: Binary /home/hoy/test/dlopen/helper.so loaded at 0x7f2505156000 diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS + Core + ProfileData + Support + Symbolize + ) + +add_llvm_tool(llvm-profgen + llvm-profgen.cpp + ) diff --git a/llvm/tools/llvm-profgen/ErrorHandling.h b/llvm/tools/llvm-profgen/ErrorHandling.h new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/ErrorHandling.h @@ -0,0 +1,41 @@ +//===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_PROGEN_ERRORHANDLING_H +#define LLVM_TOOLS_LLVM_PROGEN_ERRORHANDLING_H + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/WithColor.h" +#include + +using namespace llvm; + +LLVM_ATTRIBUTE_NORETURN inline void +exitWithError(Twine Message, StringRef Whence = StringRef(), + StringRef Hint = StringRef()) { + WithColor::error(errs(), "llvm-profgen"); + if (!Whence.empty()) + errs() << Whence.str() << ": "; + errs() << Message << "\n"; + if (!Hint.empty()) + WithColor::note() << Hint.str() << "\n"; + ::exit(EXIT_FAILURE); +} + +LLVM_ATTRIBUTE_NORETURN inline void +exitWithError(std::error_code EC, StringRef Whence = StringRef()) { + exitWithError(EC.message(), Whence); +} + +LLVM_ATTRIBUTE_NORETURN inline void exitWithError(Error E, StringRef Whence) { + exitWithError(errorToErrorCode(std::move(E)), Whence); +} +#endif diff --git a/llvm/tools/llvm-profgen/LLVMBuild.txt b/llvm/tools/llvm-profgen/LLVMBuild.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./tools/llvm-profgen/LLVMBuild.txt ----------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-profgen +parent = Tools +required_libraries = Support diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -0,0 +1,234 @@ +//===- llvm-profgen.cpp - LLVM SPGO profile generation tool ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// llvm-profgen generates SPGO profiles from perf script ouput. +// +//===----------------------------------------------------------------------===// + +#include "ErrorHandling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Regex.h" +#include +#include +#include + +using namespace llvm; + +static cl::list PerfTraceFilenames( + "perfscript", cl::value_desc("perfscript"), cl::OneOrMore, + llvm::cl::MiscFlags::CommaSeparated, + cl::desc("Path of perf-script trace created by Linux perf tool with " + "`script` command(the raw perf.data should be profiled with -b)")); + +static cl::list + BinaryFilenames("binary", cl::value_desc("binary"), cl::ZeroOrMore, + llvm::cl::MiscFlags::CommaSeparated, + cl::desc("Path of profiled binary files")); + +static cl::opt OutputFilename("output", cl::value_desc("output"), + cl::Required, + cl::desc("Output profile file")); + +static cl::opt ShowMmap("show-mmap", cl::Hidden, cl::init(false), + cl::ZeroOrMore, + cl::desc("Print binary load events.")); + +namespace llvm { +namespace sampleprof { + +class ProfiledBinary { + std::string Path; + mutable uint64_t BaseAddress = 0; + +public: + ProfiledBinary() { load(); } + ProfiledBinary(StringRef Path) : Path(Path) { ProfiledBinary(); } + + const StringRef getPath() const { return Path; } + const StringRef getName() const { return llvm::sys::path::filename(Path); } + uint64_t getBaseAddress() const { return BaseAddress; } + void setBaseAddress(uint64_t Address) { BaseAddress = Address; } + +private: + void load() { + // TODO: + } +}; + +using BinaryMap = StringMap; +using AddressBinaryMap = std::map; + +struct MMapEvent { + pid_t PID = 0; + uint64_t BaseAddress = 0; + uint64_t Size = 0; + uint64_t Offset = 0; + StringRef BinaryPath; +}; + +class PerfReader { + + BinaryMap BinaryTable; + AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup. + + /// Prepare a memory buffer for the contents of \p Filename. + /// + static std::unique_ptr setupMemoryBuffer(StringRef Filename) { + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferOrErr.getError()) + exitWithError(EC, Filename); + + auto Buffer = std::move(BufferOrErr.get()); + if (Buffer->getBufferSize() > + static_cast(std::numeric_limits::max())) + exitWithError("file too large", Filename); + + return Buffer; + } + + /// Load symbols and disassemble the code of a give binary. + /// Also register the binary in the binary table. + /// + ProfiledBinary &loadBinary(const StringRef BinaryPath, + bool AllowNameConflict = true) { + // The binary table is currently indexed by the binary name not the full + // binary path. This is because the user-given path may not match the one + // that was actually executed. + StringRef BinaryName = llvm::sys::path::filename(BinaryPath); + + // Call to load the binary in the ctor of ProfiledBinary. + auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)}); + + if (!Ret.second && !AllowNameConflict) { + std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() + + " and " + Ret.first->second.getPath().str() + + " \n"; + exitWithError(ErrorMsg); + } + + return Ret.first->second; + } + + void updateBinaryAddress(const MMapEvent &Event) { + // Load the binary. + StringRef BinaryPath = Event.BinaryPath; + StringRef BinaryName = llvm::sys::path::filename(BinaryPath); + + auto I = BinaryTable.find(BinaryName); + // Drop the event which doesn't belong to user-provided binaries + // or if its image is loaded at the same address + if (I == BinaryTable.end() || + Event.BaseAddress == I->second.getBaseAddress()) + return; + + ProfiledBinary &Binary = I->second; + + // A binary image could be uploaded and then reloaded at different + // place, so update the address map here + AddrToBinaryMap.erase(Binary.getBaseAddress()); + AddrToBinaryMap[Event.BaseAddress] = &Binary; + + // Update binary load address. + Binary.setBaseAddress(Event.BaseAddress); + } + +public: + PerfReader() {} + + /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a + /// mapping between the binary name and its memory layout. + /// + void parseMMap2Event(const line_iterator Line) { + // Parse a line like: + // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 + // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so + constexpr static const char *const Pattern = + "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " + "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " + "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; + // Field 0 - whole line + // Field 1 - PID + // Field 2 - base address + // Field 3 - mmapped size + // Field 4 - page offset + // Field 5 - binary path + enum EventIndex { + WHOLE_LINE = 0, + PID = 1, + BASE_ADDRESS = 2, + MMAPPED_SIZE = 3, + PAGE_OFFSET = 4, + BINARY_PATH = 5 + }; + + Regex RegMmap2(Pattern); + SmallVector Fields; + if (RegMmap2.match(*Line, &Fields)) { + MMapEvent Event; + Fields[PID].getAsInteger(10, Event.PID); + Fields[BASE_ADDRESS].getAsInteger(0, Event.BaseAddress); + Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size); + Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset); + Event.BinaryPath = Fields[BINARY_PATH]; + updateBinaryAddress(Event); + if (ShowMmap) { + outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at " + << format("0x%" PRIx64 ":", Event.BaseAddress) << " \n"; + } + } else { + std::string ErrorMsg = "Cannot parse mmap event: Line " + + Twine(Line.line_number()).str() + ": " + + Line->str() + " \n"; + exitWithError(ErrorMsg); + } + } + + void parseEvent(line_iterator &Index) { + if (Index->startswith("PERF_RECORD_MMAP2")) + parseMMap2Event(Index); + ++Index; + } + + void parseTrace(StringRef Filename) { + auto Buffer = setupMemoryBuffer(Filename); + line_iterator LineIt(*Buffer, /*SkipBlanks=*/false); + while (!LineIt.is_at_eof()) { + parseEvent(LineIt); + } + } + + void run() { + // Load the binaries. + for (auto Filename : BinaryFilenames) + loadBinary(Filename, /*AllowNameConflict*/ false); + + // Parse perf traces. + for (auto Filename : PerfTraceFilenames) + parseTrace(Filename); + } +}; + +} // end namespace sampleprof +} // end namespace llvm + +using namespace sampleprof; + +int main(int argc, const char *argv[]) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n"); + + PerfReader Reader; + Reader.run(); + + return EXIT_SUCCESS; +}