diff --git a/llvm/docs/CommandGuide/llvm-profgen.rst b/llvm/docs/CommandGuide/llvm-profgen.rst new file mode 100644 --- /dev/null +++ b/llvm/docs/CommandGuide/llvm-profgen.rst @@ -0,0 +1,42 @@ +llvm-profgen - LLVM SPGO profile generation tool +================================= + +.. program:: llvm-profgen + +SYNOPSIS +-------- + +:program:`llvm-profgen` [*commands*] [*options*] + +DESCRIPTION +----------- + +The :program:`llvm-profgen` utility generates a profile data file +from given perf script data files for sample-based profile guided +optimization(SPGO). + +COMMANDS +-------- +At least one of the following commands are required: + +.. option:: --perfscript= + + Path of perf-script trace created by Linux perf tool with `script` + command(the raw perf.data should be profiled with -b). + +.. option:: --output= + + Path of the output profile file. + +OPTIONS +------- +:program:`llvm-profgen` supports the following options: + +.. option:: --binary= + + Path of the input profiled binary files. If no file path is specified, the + path of the actual profiled binaries will be used instead. + +.. option:: --show-mmap-events + + Print mmap events. diff --git a/llvm/test/tools/llvm-profgen/lit.local.cfg b/llvm/test/tools/llvm-profgen/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/lit.local.cfg @@ -0,0 +1,6 @@ +import subprocess +import lit.util + +config.suffixes = ['.test', '.ll', '.s', '.yaml'] +if not lit.util.which("llvm-profgen", config.llvm_tools_dir): + config.unsupported = True diff --git a/llvm/test/tools/llvm-profgen/mmapEvent.test b/llvm/test/tools/llvm-profgen/mmapEvent.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/mmapEvent.test @@ -0,0 +1,30 @@ +; RUN: llvm-profgen --perfscript=%s --output=%t --show-mmap-events | FileCheck %s + +PERF_RECORD_MMAP2 2580483/2580483: [0x400000(0x1000) @ 0 103:01 539973862 1972407324]: r-xp /home/a.out +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505b40000(0x224000) @ 0 08:04 19532214 4169021329]: r-xp /usr/lib64/ld-2.17.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7ffe88097000(0x1000) @ 0 00:00 0 0]: r-xp [vdso] +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505d56000(0xa000) @ 0 08:04 19530021 4190740662]: r-xp /usr/lib64/perf_fopen_hook.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7f250593c000(0x204000) @ 0 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7f250556e000(0x3ce000) @ 0 08:04 19532221 4003737677]: r-xp /usr/lib64/libc-2.17.so +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505358000(0x216000) @ 0 08:04 19534595 2609212015]: r-xp /usr/lib64/libz.so.1.2.7 + 7f2505b49811 + 0x7f2505b49811/0x7f2505b509f0/P/-/-/0 0x7f2505b4974c/0x7f2505b4975b/P/-/-/0 0x7f2505b49837/0x7f2505b49720/P/-/-/0 0x7f2505b50a5a/0x7f2505b49816/P/-/-/0 0x7f2505b50a27/0x7f2505b50a50/P/-/-/0 0x7f2505b50a36/0x7f2505b50a20/P/-/-/0 0x7f2505b59dd0/0x7f2505b50a34/P/-/-/0 0x7f2505b59db4/0x7f2505b59dc3/P/-/-/0 0x7f2505b50a2f/0x7f2505b59db0/P/-/-/0 0x7f2505b50a15/0x7f2505b50a29/P/-/-/0 0x7f2505b59dd0/0x7f2505b50a05/P/-/-/0 0x7f2505b59db4/0x7f2505b59dc3/P/-/-/0 0x7f2505b50a00/0x7f2505b59db0/P/-/-/0 0x7f2505b49811/0x7f2505b509f0/P/-/-/0 0x7f2505b4974c/0x7f2505b4975b/P/-/-/0 0x7f2505b4a08a/0x7f2505b496a0/P/-/-/0 +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505d56000(0x8000) @ 0 08:04 19530021 4190740662]: r-xp /usr/lib64/perf_fopen_hook.so + 4006b1 + 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505156000(0x202000) @ 0 103:01 539962022 734061270]: r-xp /home/hoy/test/dlopen/helper.so + 4006b1 + 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 0x4006b1/0x4006a0/P/-/-/0 +PERF_RECORD_MMAP2 2580483/2580483: [0x7f2505156000(0x202000) @ 0 103:01 539962022 734061270]: r-xp /home/hoy/test/dlopen/helper.so + + +; CHECK: Mmap: Binary /home/a.out loaded at 0x400000 +; CHECK: Mmap: Binary /usr/lib64/ld-2.17.so loaded at 0x7f2505b40000 +; CHECK: Mmap: Binary [vdso] loaded at 0x7ffe88097000 +; CHECK: Mmap: Binary /usr/lib64/perf_fopen_hook.so loaded at 0x7f2505d56000 +; CHECK: Mmap: Binary /usr/lib64/libdl-2.17.so loaded at 0x7f250593c000 +; CHECK: Mmap: Binary /usr/lib64/libc-2.17.so loaded at 0x7f250556e000 +; CHECK: Mmap: Binary /usr/lib64/libz.so.1.2.7 loaded at 0x7f2505358000 +; CHECK: Mmap: Binary /usr/lib64/perf_fopen_hook.so loaded at 0x7f2505d56000 +; CHECK: Mmap: Binary /home/hoy/test/dlopen/helper.so loaded at 0x7f2505156000 +; CHECK: Mmap: Binary /home/hoy/test/dlopen/helper.so loaded at 0x7f2505156000 diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/CMakeLists.txt @@ -0,0 +1,11 @@ +set(LLVM_LINK_COMPONENTS + Core + ProfileData + Support + Symbolize + ) + +add_llvm_tool(llvm-profgen + llvm-profgen.cpp + PerfReader.cpp + ) diff --git a/llvm/tools/llvm-profgen/ErrorHandling.h b/llvm/tools/llvm-profgen/ErrorHandling.h new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/ErrorHandling.h @@ -0,0 +1,41 @@ +//===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H +#define LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/WithColor.h" +#include + +using namespace llvm; + +LLVM_ATTRIBUTE_NORETURN inline void +exitWithError(const Twine &Message, StringRef Whence = StringRef(), + StringRef Hint = StringRef()) { + WithColor::error(errs(), "llvm-profgen"); + if (!Whence.empty()) + errs() << Whence.str() << ": "; + errs() << Message << "\n"; + if (!Hint.empty()) + WithColor::note() << Hint.str() << "\n"; + ::exit(EXIT_FAILURE); +} + +LLVM_ATTRIBUTE_NORETURN inline void +exitWithError(std::error_code EC, StringRef Whence = StringRef()) { + exitWithError(EC.message(), Whence); +} + +LLVM_ATTRIBUTE_NORETURN inline void exitWithError(Error E, StringRef Whence) { + exitWithError(errorToErrorCode(std::move(E)), Whence); +} +#endif diff --git a/llvm/tools/llvm-profgen/LLVMBuild.txt b/llvm/tools/llvm-profgen/LLVMBuild.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./tools/llvm-profgen/LLVMBuild.txt ----------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-profgen +parent = Tools +required_libraries = Support diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -0,0 +1,102 @@ +//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H +#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H +#include "ErrorHandling.h" +#include "ProfiledBinary.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Regex.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace sampleprof; + +namespace llvm { +namespace sampleprof { + +// Stream based trace line iterator +class TraceStream { + std::string CurrentLine; + std::ifstream Fin; + bool IsAtEoF = false; + uint64_t LineNumber = 0; + +public: + TraceStream(StringRef Filename) : Fin(Filename.str()) { + if (!Fin.good()) + exitWithError("Error read input perf script file", Filename); + advance(); + } + + StringRef getCurrentLine() { + assert(!IsAtEoF && "Line iterator reaches the End-of-File!"); + return CurrentLine; + } + + uint64_t getLineNumber() { return LineNumber; } + + bool isAtEoF() { return IsAtEoF; } + + // Read the next line + void advance() { + if (!std::getline(Fin, CurrentLine)) { + IsAtEoF = true; + return; + } + LineNumber++; + } +}; + +// Filename to binary map +using BinaryMap = StringMap; +// Address to binary map for fast look-up +using AddressBinaryMap = std::map; + +// Load binaries and read perf trace to parse the events and samples +class PerfReader { + + BinaryMap BinaryTable; + AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup. + + // The parsed MMap event + struct MMapEvent { + pid_t PID = 0; + uint64_t BaseAddress = 0; + uint64_t Size = 0; + uint64_t Offset = 0; + StringRef BinaryPath; + }; + + /// Load symbols and disassemble the code of a give binary. + /// Also register the binary in the binary table. + /// + ProfiledBinary &loadBinary(const StringRef BinaryPath, + bool AllowNameConflict = true); + void updateBinaryAddress(const MMapEvent &Event); + +public: + PerfReader(cl::list &BinaryFilenames); + + /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a + /// mapping between the binary name and its memory layout. + /// + void parseMMap2Event(TraceStream &TraceIt); + void parseEvent(TraceStream &TraceIt); + // Parse perf events and samples + void parseTrace(StringRef Filename); + void parsePerfTraces(cl::list &PerfTraceFilenames); +}; + +} // end namespace sampleprof +} // end namespace llvm + +#endif diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -0,0 +1,131 @@ +//===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "PerfReader.h" + +static cl::opt ShowMmapEvents("show-mmap-events", cl::ReallyHidden, + cl::init(false), cl::ZeroOrMore, + cl::desc("Print binary load events.")); + +namespace llvm { +namespace sampleprof { + +PerfReader::PerfReader(cl::list &BinaryFilenames) { + // Load the binaries. + for (auto Filename : BinaryFilenames) + loadBinary(Filename, /*AllowNameConflict*/ false); +} + +ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath, + bool AllowNameConflict) { + // The binary table is currently indexed by the binary name not the full + // binary path. This is because the user-given path may not match the one + // that was actually executed. + StringRef BinaryName = llvm::sys::path::filename(BinaryPath); + + // Call to load the binary in the ctor of ProfiledBinary. + auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)}); + + if (!Ret.second && !AllowNameConflict) { + std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() + + " and " + Ret.first->second.getPath().str() + " \n"; + exitWithError(ErrorMsg); + } + + return Ret.first->second; +} + +void PerfReader::updateBinaryAddress(const MMapEvent &Event) { + // Load the binary. + StringRef BinaryPath = Event.BinaryPath; + StringRef BinaryName = llvm::sys::path::filename(BinaryPath); + + auto I = BinaryTable.find(BinaryName); + // Drop the event which doesn't belong to user-provided binaries + // or if its image is loaded at the same address + if (I == BinaryTable.end() || Event.BaseAddress == I->second.getBaseAddress()) + return; + + ProfiledBinary &Binary = I->second; + + // A binary image could be uploaded and then reloaded at different + // place, so update the address map here + AddrToBinaryMap.erase(Binary.getBaseAddress()); + AddrToBinaryMap[Event.BaseAddress] = &Binary; + + // Update binary load address. + Binary.setBaseAddress(Event.BaseAddress); +} + +void PerfReader::parseMMap2Event(TraceStream &TraceIt) { + // Parse a line like: + // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 + // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so + constexpr static const char *const Pattern = + "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " + "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " + "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; + // Field 0 - whole line + // Field 1 - PID + // Field 2 - base address + // Field 3 - mmapped size + // Field 4 - page offset + // Field 5 - binary path + enum EventIndex { + WHOLE_LINE = 0, + PID = 1, + BASE_ADDRESS = 2, + MMAPPED_SIZE = 3, + PAGE_OFFSET = 4, + BINARY_PATH = 5 + }; + + Regex RegMmap2(Pattern); + SmallVector Fields; + bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields); + if (!R) { + std::string ErrorMsg = "Cannot parse mmap event: Line" + + Twine(TraceIt.getLineNumber()).str() + ": " + + TraceIt.getCurrentLine().str() + " \n"; + exitWithError(ErrorMsg); + } + MMapEvent Event; + Fields[PID].getAsInteger(10, Event.PID); + Fields[BASE_ADDRESS].getAsInteger(0, Event.BaseAddress); + Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size); + Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset); + Event.BinaryPath = Fields[BINARY_PATH]; + updateBinaryAddress(Event); + if (ShowMmapEvents) { + outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at " + << format("0x%" PRIx64 ":", Event.BaseAddress) << " \n"; + } +} + +void PerfReader::parseEvent(TraceStream &TraceIt) { + if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2")) + parseMMap2Event(TraceIt); + + TraceIt.advance(); +} + +void PerfReader::parseTrace(StringRef Filename) { + // Trace line iterator + TraceStream TraceIt(Filename); + while (!TraceIt.isAtEoF()) { + parseEvent(TraceIt); + } +} + +void PerfReader::parsePerfTraces(cl::list &PerfTraceFilenames) { + // Parse perf traces. + for (auto Filename : PerfTraceFilenames) + parseTrace(Filename); +} + +} // namespace sampleprof +} // namespace llvm diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -0,0 +1,38 @@ +//===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H +#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace sampleprof { + +class ProfiledBinary { + std::string Path; + mutable uint64_t BaseAddress = 0; + +public: + ProfiledBinary(StringRef Path) : Path(Path) { load(); } + + const StringRef getPath() const { return Path; } + const StringRef getName() const { return llvm::sys::path::filename(Path); } + uint64_t getBaseAddress() const { return BaseAddress; } + void setBaseAddress(uint64_t Address) { BaseAddress = Address; } + +private: + void load() { + // TODO: + } +}; + +} // end namespace sampleprof +} // end namespace llvm + +#endif diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -0,0 +1,47 @@ +//===- llvm-profgen.cpp - LLVM SPGO profile generation tool ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// llvm-profgen generates SPGO profiles from perf script ouput. +// +//===----------------------------------------------------------------------===// + +#include "ErrorHandling.h" +#include "PerfReader.h" +#include "ProfiledBinary.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" + +static cl::list PerfTraceFilenames( + "perfscript", cl::value_desc("perfscript"), cl::OneOrMore, + llvm::cl::MiscFlags::CommaSeparated, + cl::desc("Path of perf-script trace created by Linux perf tool with " + "`script` command(the raw perf.data should be profiled with -b)")); + +static cl::list + BinaryFilenames("binary", cl::value_desc("binary"), cl::ZeroOrMore, + llvm::cl::MiscFlags::CommaSeparated, + cl::desc("Path of profiled binary files")); + +static cl::opt OutputFilename("output", cl::value_desc("output"), + cl::Required, + cl::desc("Output profile file")); + +using namespace llvm; +using namespace sampleprof; + +int main(int argc, const char *argv[]) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n"); + + // Load binaries and parse perf events and samples + PerfReader Reader(BinaryFilenames); + Reader.parsePerfTraces(PerfTraceFilenames); + + return EXIT_SUCCESS; +}