Index: include/llvm/ProfileData/SampleProfileConverter.h =================================================================== --- /dev/null +++ include/llvm/ProfileData/SampleProfileConverter.h @@ -0,0 +1,69 @@ +//=-- SampleProfileConverter.h - Sample profile converter API -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for converting profiles from external profilers +// into the format supported by lib/Transforms/Scalar/SampleProfile.cpp. +// +//===----------------------------------------------------------------------===// +#ifndef SAMPLEPGO_PROFILE_CONVERTER_H_ +#define SAMPLEPGO_PROFILE_CONVERTER_H_ + +#include "llvm/ADT/StringRef.h" +#include "llvm/ProfileData/SampleProfileReader.h" + +namespace samplepgo { + +enum InputProfileKind { + Unknown = -1, ///< Unknown profiler. An error will be generated. + LinuxPerf, ///< Linux Perf profiler (https://perf.wiki.kernel.org/) +}; + +class SampleProfileConverter { +public: + explicit SampleProfileConverter(StringRef Binary) + : Reader(nullptr), Binary(Binary) {} + + /// \brief Creates a SamplePGO profile out of an external sample profile. + /// + /// \param InputProfileName Name of the profile file to convert. + /// \param Profiler Input profile kind. + /// \param OutputProfileName Name of the SamplePGO file to generate. + /// + /// \returns true if it succeeds, false otherwise. + bool createProfile(const string &InputProfileName, InputProfileKind Profiler, + const string &OutputProfileName); + + /// \brief Reads samples from the input profile. + /// + /// This is called by createProfile when it needs to read the input + /// profile. + /// + /// \param InputProfileName Name of the profile file to convert. + /// \param Profiler Input profile kind. + /// + /// \returns true if it could read the profile, false otherwise. + bool readSample(const string &InputProfileName, InputProfileKind Profiler); + + // Creates output profile after reading from the input profile. + bool createProfileFromSample(const string &OutputProfileName); + + /// \returns total number of samples collected. + uint64_t getTotalSamples(); + + // \returns the SampleReader pointer. + const samplepgo::SampleReader &getSampleReader() { return *Reader; } + +private: + std::unique_ptr Reader; + StringRef Binary; +}; + +} // namespace samplepgo + +#endif // SAMPLEPGO_PROFILE_CONVERTER_H_ Index: include/llvm/ProfileData/SampleProfileReader.h =================================================================== --- /dev/null +++ include/llvm/ProfileData/SampleProfileReader.h @@ -0,0 +1,113 @@ +//=-- SampleProfileReader.h - Generic sample profiling reader API -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading profiling data from external +// sampling profilers. +// +//===----------------------------------------------------------------------===// +#ifndef SAMPLEPGO_SAMPLE_READER_H_ +#define SAMPLEPGO_SAMPLE_READER_H_ + +#include +#include +#include +#include +#include + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" + +using namespace std; +using namespace llvm; + +namespace samplepgo { + +typedef map AddressCountMap; +typedef pair Range; +typedef map RangeCountMap; +typedef pair Branch; +typedef map BranchCountMap; + +// Reads in the profile data, and represent it in AddressCount. +class SampleReader { +public: + explicit SampleReader() : MaxCount(0) {} + virtual ~SampleReader() {} + + /// \brief Read the input profile and set the maximum sample value found. + /// + /// \returns true if the profile was read successfully. False, otherwise. + bool readAndSetMaxCount(); + + const AddressCountMap &getAddressCountMap() const { return AddressCount; } + + const RangeCountMap &getRangeCountMap() const { return RangeCount; } + + const BranchCountMap &getBranchCountMap() const { return BranchCount; } + + /// \returns a set of all sampled addresses in the input profile. + set getSampledAddresses() const; + + /// \brief Returns the sample count for a given instruction. + /// + /// \param Addr the address of the instruction to query. + uint64_t getSampleCount(uint64_t addr) const; + + /// \brief Returns the total sampled count. + uint64_t getTotalSampleCount() const; + + /// \brief Returns the maximum sampled count. + uint64_t getMaxCount() const { return MaxCount; } + + // Clear all maps to release memory. + void clear() { + AddressCount.clear(); + RangeCount.clear(); + BranchCount.clear(); + } + +protected: + /// \brief Virtual read function to read from different types of profiles. + virtual bool read() = 0; + + uint64_t MaxCount; + AddressCountMap AddressCount; + RangeCountMap RangeCount; + BranchCountMap BranchCount; +}; + +// Base class that reads in the profile from a sample data file. +class FileSampleReader : public SampleReader { +public: + explicit FileSampleReader(StringRef ProfileFile) + : ProfileFile(ProfileFile) {} + + virtual bool append(StringRef ProfileFile) = 0; + +protected: + virtual bool read(); + + StringRef ProfileFile; +}; + +// Reads in the sample data from a 'perf -b' output file. +class PerfDataSampleReader : public FileSampleReader { +public: + explicit PerfDataSampleReader(StringRef ProfileFile, const string &RE) + : FileSampleReader(ProfileFile), FocusBinaryRE(RE) {} + + virtual bool append(StringRef ProfileFile) override; + +private: + const string FocusBinaryRE; +}; + +} // namespace samplepgo + +#endif // SAMPLEPGO_SAMPLE_READER_H_ Index: lib/ProfileData/CMakeLists.txt =================================================================== --- lib/ProfileData/CMakeLists.txt +++ lib/ProfileData/CMakeLists.txt @@ -6,3 +6,5 @@ CoverageMappingWriter.cpp CoverageMappingReader.cpp ) + +add_subdirectory(PerfConverter) Index: lib/ProfileData/LLVMBuild.txt =================================================================== --- lib/ProfileData/LLVMBuild.txt +++ lib/ProfileData/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = PerfConverter + [component_0] type = Library name = ProfileData Index: lib/ProfileData/Makefile =================================================================== --- lib/ProfileData/Makefile +++ lib/ProfileData/Makefile @@ -8,6 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. +PARALLEL_DIRS = PerfConverter LIBRARYNAME = LLVMProfileData BUILD_ARCHIVE := 1 Index: lib/ProfileData/PerfConverter/CMakeLists.txt =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/CMakeLists.txt @@ -0,0 +1,33 @@ +add_llvm_library(LLVMPerfConverter + # LinuxPerf to LLVM sample profile converter. + addr2line.cpp + instruction_map.cpp + llvm_profile_writer.cpp + profile.cpp + profile_creator.cpp + profile_writer.cpp + source_info.cpp + symbol_map.cpp + sample_reader.cpp + + # Linux Perf reader. + # Ported from https://github.com/google/autofdo/tree/master/quipper + quipper/address_mapper.cc + quipper/perf_reader.cc + quipper/perf_parser.cc + quipper/utils.cc + + # Symbolize - ELF/DWARF reader ported from: + # https://github.com/google/autofdo/tree/master/symbolize + # + # TODO - replace with LLVM tools/llvm-readobj and tools/llvm-symbolize). + symbolize/addr2line_inlinestack.cc + symbolize/bytereader.cc + symbolize/dwarf2reader.cc + symbolize/dwarf3ranges.cc + symbolize/elf_reader.cc + symbolize/functioninfo.cc + ) + +# For Quipper's use of zero-sized arrays. +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pedantic") Index: lib/ProfileData/PerfConverter/LLVMBuild.txt =================================================================== --- lib/ProfileData/PerfConverter/LLVMBuild.txt +++ lib/ProfileData/PerfConverter/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/ProfileData/LLVMBuild.txt --------------------------*- Conf -*--===; +;===- ./lib/ProfileData/PerfConverter/LLVMBuild.txt ------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,6 +17,6 @@ [component_0] type = Library -name = ProfileData -parent = Libraries -required_libraries = Support Object +name = PerfConverter +parent = ProfileData +required_libraries = Support Index: lib/ProfileData/PerfConverter/Makefile =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/Makefile @@ -0,0 +1,25 @@ +##===- lib/ProfileData/PerfConverter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMPerfConverter +BUILD_ARCHIVE := 1 + +SOURCES = addr2line.cc instruction_map.cc llvm_profile_writer.cc profile.cc \ + profile_creator.cc profile_writer.cc source_info.cc symbol_map.cc \ + sample_reader.cc quipper/address_mapper.cc quipper/perf_reader.cc \ + quipper/perf_parser.cc quipper/utils.cc symbolize/addr2line_inlinestack.cc \ + symbolize/bytereader.cc symbolize/dwarf2reader.cc symbolize/dwarf3ranges.cc \ + symbolize/elf_reader.cc symbolize/functioninfo.cc + +include $(LEVEL)/Makefile.common + +# For libquipper's use of zero-sized arrays. Need to use CompileCommonOpts +# because -pedantic is specified there. +CompileCommonOpts += -Wno-pedantic -I$(LLVM_SRC_ROOT)/lib/ProfileData/PerfConverter Index: lib/ProfileData/PerfConverter/addr2line.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/addr2line.h @@ -0,0 +1,67 @@ +//=-- addr2line.h - Class to derive inline stack ----------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_ADDR2LINE_H_ +#define AUTOFDO_ADDR2LINE_H_ + +#include +#include + +#include "llvm/Support/DataTypes.h" + +#include "source_info.h" + +namespace autofdo { +class ElfReader; +class InlineStackHandler; +class LineIdentifier; +} + +namespace samplepgo { +class Addr2line { +public: + explicit Addr2line(const string &binary_name) : binary_name_(binary_name) {} + + virtual ~Addr2line() {} + + static Addr2line *Create(const string &binary_name); + + static Addr2line * + CreateWithSampledFunctions(const string &binary_name, + const map *sampled_functions); + + // Reads the binary to prepare necessary binary in data. + // Returns True on success. + virtual bool Prepare() = 0; + + // Stores the inline stack of ADDR in STACK. + virtual void GetInlineStack(uint64_t addr, SourceStack *stack) const = 0; + +protected: + string binary_name_; +}; + +typedef map AddressToLineMap; + +class Google3Addr2line : public Addr2line { +public: + explicit Google3Addr2line(const string &binary_name, + const map *sampled_functions); + virtual ~Google3Addr2line(); + virtual bool Prepare(); + virtual void GetInlineStack(uint64_t address, SourceStack *stack) const; + +private: + AddressToLineMap *line_map_; + autofdo::InlineStackHandler *inline_stack_handler_; + autofdo::ElfReader *elf_; + const map *sampled_functions_; +}; +} // namespace samplepgo + +#endif // AUTOFDO_ADDR2LINE_H_ Index: lib/ProfileData/PerfConverter/addr2line.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/addr2line.cpp @@ -0,0 +1,215 @@ +//=-- addr2line.cc - Class to derive inline stack ---------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include + +#include "llvm/Support/raw_ostream.h" + +#include "addr2line.h" +#include "symbolize/bytereader.h" +#include "symbolize/dwarf2reader.h" +#include "symbolize/dwarf3ranges.h" +#include "symbolize/addr2line_inlinestack.h" +#include "symbolize/functioninfo.h" +#include "symbolize/elf_reader.h" +#include "symbol_map.h" + +namespace { +void GetSection(const autofdo::SectionMap §ions, const char *section_name, + const char **data_p, size_t *size_p, const string &file_name, + const char *error) { + const char *data; + size_t size; + + autofdo::SectionMap::const_iterator section = sections.find(section_name); + + if (section == sections.end()) { + data = NULL; + size = 0; + } else { + data = section->second.first; + size = section->second.second; + } + + if (data_p) + *data_p = data; + if (size_p) + *size_p = size; +} +} // namespace + +namespace samplepgo { + +Addr2line *Addr2line::Create(const string &binary_name) { + return CreateWithSampledFunctions(binary_name, NULL); +} + +Addr2line *Addr2line::CreateWithSampledFunctions( + const string &binary_name, const map *sampled_functions) { + Addr2line *addr2line = new Google3Addr2line(binary_name, sampled_functions); + if (!addr2line->Prepare()) { + delete addr2line; + return NULL; + } else { + return addr2line; + } +} + +Google3Addr2line::Google3Addr2line(const string &binary_name, + const map *sampled_functions) + : Addr2line(binary_name), line_map_(new AddressToLineMap()), + inline_stack_handler_(NULL), elf_(new autofdo::ElfReader(binary_name)), + sampled_functions_(sampled_functions) {} + +Google3Addr2line::~Google3Addr2line() { + delete line_map_; + delete elf_; + if (inline_stack_handler_) { + delete inline_stack_handler_; + } +} + +bool Google3Addr2line::Prepare() { + autofdo::ByteReader reader(autofdo::ENDIANNESS_LITTLE); + int width; + if (elf_->IsElf32File()) { + width = 4; + } else if (elf_->IsElf64File()) { + width = 8; + } else { + LOG(ERROR) << "'" << binary_name_ << "' is not an ELF file"; + return false; + } + reader.SetAddressSize(width); + + autofdo::SectionMap sections; + const char *debug_section_names[] = { + ".debug_line", ".debug_abbrev", ".debug_info", ".debug_line", + ".debug_str", ".debug_ranges", ".debug_addr"}; + for (const char *section_name : debug_section_names) { + size_t section_size; + const char *section_data = + elf_->GetSectionByName(section_name, §ion_size); + if (section_data == NULL) + continue; + sections[section_name] = make_pair(section_data, section_size); + } + + size_t debug_info_size = 0; + size_t debug_ranges_size = 0; + const char *debug_ranges_data = NULL; + GetSection(sections, ".debug_info", NULL, &debug_info_size, binary_name_, ""); + GetSection(sections, ".debug_ranges", &debug_ranges_data, &debug_ranges_size, + binary_name_, ""); + autofdo::AddressRangeList debug_ranges(debug_ranges_data, debug_ranges_size, + &reader); + inline_stack_handler_ = new autofdo::InlineStackHandler( + &debug_ranges, sections, &reader, sampled_functions_); + + // Extract the line information + // If .debug_info section is available, we will locate .debug_line using + // .debug_info. Otherwise, we'll iterate through .debug_line section, + // assuming that compilation units are stored continuously in it. + if (debug_info_size > 0) { + size_t debug_info_pos = 0; + while (debug_info_pos < debug_info_size) { + autofdo::DirectoryVector dirs; + autofdo::FileVector files; + autofdo::CULineInfoHandler handler(&files, &dirs, line_map_, + sampled_functions_); + inline_stack_handler_->set_directory_names(&dirs); + inline_stack_handler_->set_file_names(&files); + inline_stack_handler_->set_line_handler(&handler); + autofdo::CompilationUnit compilation_unit(binary_name_, sections, + debug_info_pos, &reader, + inline_stack_handler_); + debug_info_pos += compilation_unit.Start(); + if (compilation_unit.malformed()) { + LOG(WARNING) << "File '" << binary_name_ << "' has mangled " + << ".debug_info section."; + // If the compilation unit is malformed, we do not know how + // big it is, so it is only safe to give up. + break; + } + } + } else { + const char *data; + size_t size; + GetSection(sections, ".debug_line", &data, &size, binary_name_, ""); + if (data) { + size_t pos = 0; + while (pos < size) { + autofdo::DirectoryVector dirs; + autofdo::FileVector files; + autofdo::CULineInfoHandler handler(&files, &dirs, line_map_); + autofdo::LineInfo line(data + pos, size - pos, &reader, &handler); + uint64 read = line.Start(); + if (line.malformed()) { + // If the debug_line section is malformed, we should stop + LOG(WARNING) << "File '" << binary_name_ << "' has mangled " + << ".debug_line section."; + break; + } + if (!read) + break; + pos += read; + } + } + } + inline_stack_handler_->PopulateSubprogramsByAddress(); + + return true; +} + +void Google3Addr2line::GetInlineStack(uint64 address, + SourceStack *stack) const { + AddressToLineMap::const_iterator iter = line_map_->upper_bound(address); + if (iter == line_map_->begin()) + return; + --iter; + if (iter->second.line == 0) + return; + + const autofdo::SubprogramInfo *subprog = + inline_stack_handler_->GetSubprogramForAddress(address); + + const char *function_name = NULL; + uint32 start_line = 0; + if (subprog != NULL) { + const autofdo::SubprogramInfo *declaration = + inline_stack_handler_->GetDeclaration(subprog); + function_name = declaration->name().c_str(); + start_line = + inline_stack_handler_->GetAbstractOrigin(subprog)->callsite_line(); + if (start_line == 0) + start_line = declaration->callsite_line(); + } + + stack->push_back(SourceInfo(function_name, iter->second.file.first, + iter->second.file.second, start_line, + iter->second.line, iter->second.discriminator)); + + while (subprog != NULL && subprog->inlined()) { + const autofdo::SubprogramInfo *canonical_parent = + inline_stack_handler_->GetDeclaration(subprog->parent()); + CHECK(subprog->parent() != NULL); + uint32 start_line = + inline_stack_handler_->GetAbstractOrigin(subprog->parent()) + ->callsite_line(); + if (start_line == 0) + start_line = canonical_parent->callsite_line(); + if (start_line == 0) + start_line = subprog->callsite_line(); + stack->push_back(SourceInfo( + canonical_parent->name().c_str(), subprog->callsite_directory(), + subprog->callsite_filename(), start_line, subprog->callsite_line(), + subprog->callsite_discr())); + subprog = subprog->parent(); + } +} +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/instruction_map.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/instruction_map.h @@ -0,0 +1,71 @@ +//=-- instruction_map.h - Map instruction addresses to location -------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_INSTRUCTION_MAP_H_ +#define AUTOFDO_INSTRUCTION_MAP_H_ + +#include +#include +#include +#include + +#include "symbol_map.h" + +namespace samplepgo { + +class SampleReader; +class Addr2line; + +// InstructionMap stores all the disassembled instructions in +// the binary, and maps it to its information. +class InstructionMap { +public: + // Arguments: + // addr2line: addr2line class, used to get the source stack. + // symbol: the symbol map. This object is not const because + // we will update the file name of each symbol + // according to the debug info of each instruction. + InstructionMap(Addr2line *addr2line, SymbolMap *symbol) + : symbol_map_(symbol), addr2line_(addr2line) {} + + // Deletes all the InstInfo, which was allocated in BuildInstMap. + ~InstructionMap(); + + // Returns the size of the instruction map. + uint64_t size() const { return inst_map_.size(); } + + // Builds instruction map for a function. + void BuildPerFunctionInstructionMap(const string &name, uint64_t start_addr, + uint64_t end_addr); + + // Contains information about each instruction. + struct InstInfo { + const SourceInfo &source(int i) const { + assert(i >= 0 && source_stack.size() > (unsigned)i); + return source_stack[i]; + } + SourceStack source_stack; + }; + + typedef map InstMap; + const InstMap &inst_map() const { return inst_map_; } + +private: + // A map from instruction address to its information. + InstMap inst_map_; + + // A map from symbol name to symbol data. + SymbolMap *symbol_map_; + + // Addr2line driver which is used to derive source stack. + Addr2line *addr2line_; +}; + +} // namespace samplepgo + +#endif // AUTOFDO_INSTRUCTION_MAP_H_ Index: lib/ProfileData/PerfConverter/instruction_map.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/instruction_map.cpp @@ -0,0 +1,41 @@ +//=-- instruction_map.cc - Map instruction addresses to location ------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include + +#include "llvm/Support/DataTypes.h" + +#include "instruction_map.h" +#include "addr2line.h" +#include "symbol_map.h" + +namespace samplepgo { +InstructionMap::~InstructionMap() { + for (const auto &addr_info : inst_map_) { + delete addr_info.second; + } +} + +void InstructionMap::BuildPerFunctionInstructionMap(const string &name, + uint64_t start_addr, + uint64_t end_addr) { + if (start_addr >= end_addr) { + return; + } + for (uint64_t addr = start_addr; addr < end_addr; addr++) { + InstInfo *info = new InstInfo(); + addr2line_->GetInlineStack(addr, &info->source_stack); + inst_map_.insert(InstMap::value_type(addr, info)); + if (info->source_stack.size() > 0) { + symbol_map_->AddSourceCount(name, info->source_stack, 0, 1, + SymbolMap::MAX); + } + } +} + +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/llvm_port.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/llvm_port.h @@ -0,0 +1,109 @@ +//===- llvm_port.h - Definitios to facilitate porting Quipper ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed to facilitate porting Quipper and +// other AutoFDO functionality from https://github.com/google/autofdo. +// +//===----------------------------------------------------------------------===// +#ifndef __LLVM_PORT_H +#define __LLVM_PORT_H + +#include + +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/raw_ostream.h" + +using namespace std; +using namespace llvm; + +typedef uint64_t uint64; +typedef uint32_t uint32; +typedef uint16_t uint16; +typedef uint8_t uint8; + +typedef int64_t int64; +typedef int32_t int32; +typedef int8_t int8; + +static const uint64 kuint64max = ((uint64)0xFFFFFFFFFFFFFFFFLL); +static const int64 kint64max = ((int64)0x7FFFFFFFFFFFFFFFLL); + +/* Silence all logging statements from quipper. */ +#define LOG(X) nulls() +#define PLOG(X) LOG(X) +#define DLOG(X) LOG(X) +#define VLOG(X) LOG(X) + +#if LANG_CXX11 +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName &) = delete; \ + void operator=(const TypeName &) = delete +#else +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName &); \ + void operator=(const TypeName &) +#endif + +#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName) + +namespace { +class raw_fatal_ostream : public raw_ostream { +public: + raw_fatal_ostream() : raw_ostream() {} + ~raw_fatal_ostream() { flush(); } +protected: + void write_impl(const char *Ptr, size_t Size) override { + ::fwrite(Ptr, Size, 1, stderr); + ::fputc('\n', stderr); + ::abort(); + } + + uint64_t current_pos() const override { + return ::ftell(stderr); + } +}; + +raw_fatal_ostream &fatals() { + static raw_fatal_ostream S; + return S; +} + +#define CHECK(x) (!(x) ? fatals() : nulls()) << #x +#define DCHECK(x) CHECK(x) +#define CHECK_GT(x, y) CHECK((x) > (y)) +#define CHECK_GE(x, y) CHECK((x) >= (y)) +#define CHECK_LT(x, y) CHECK((x) < (y)) +#define CHECK_LE(x, y) CHECK((x) <= (y)) +#define CHECK_NE(x, y) CHECK((x) != (y)) +#define CHECK_EQ(x, y) CHECK((x) == (y)) +} + +#if defined(__clang__) && defined(LANG_CXX11) && defined(__has_warning) +#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") +#define FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT +#endif +#endif + +#ifndef FALLTHROUGH_INTENDED +#define FALLTHROUGH_INTENDED \ + do { \ + } while (0) +#endif + +template char (&ArraySizeHelper(T (&array)[N]))[N]; + +#ifndef COMPILER_MSVC +template char (&ArraySizeHelper(const T (&array)[N]))[N]; +#endif + +#define arraysize(array) (sizeof(ArraySizeHelper(array))) + +#endif // __LLVM_PORT_H Index: lib/ProfileData/PerfConverter/llvm_profile_writer.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/llvm_profile_writer.cpp @@ -0,0 +1,144 @@ +//=-- llvm_profile_writer.cc - Convert a Perf profile to LLVM ---------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/raw_ostream.h" + +#include "profile_writer.h" + +using namespace llvm; + +static cl::opt + DebugDump("debug-dump", + cl::desc("If set, emit additional debugging dumps to stderr.")); + +namespace samplepgo { + +class LLVMSourceProfileWriter : public SymbolTraverser { +public: + static void Write(FILE *outf, const SymbolMap &symbol_map, + const StringIndexMap &map) { + LLVMSourceProfileWriter writer(outf, map); + writer.Start(symbol_map); + } + +protected: + void WriteSourceLocation(uint32_t start_line, uint32_t offset) { + if (offset & 0xffff) { + fprintf(outf_, "%u.%u: ", (offset >> 16) + start_line, offset & 0xffff); + } else { + fprintf(outf_, "%u: ", (offset >> 16) + start_line); + } + } + + virtual void Visit(const Symbol *node) { + // Sort sample locations by line number. + vector positions; + for (const auto &pos_count : node->pos_counts) { + // Do not waste storage writing 0 counts. + if (pos_count.second.count == 0) { + continue; + } + positions.push_back(pos_count.first); + } + + // Similarly, do not waste time emitting profiles for + // functions that have no counts in them. + if (positions.empty()) + return; + + // Clang does not generate a name for the implicit ctor of anonymous + // structs, so there won't be a name to attach the samples to. If + // the name of this function is empty, ignore it. + if (strlen(node->info.func_name) == 0) + return; + + // We have samples inside the function body. Write them out. + sort(positions.begin(), positions.end()); + fprintf(outf_, "%s:%llu:%llu\n", node->info.func_name, + static_cast(node->total_count), + static_cast(node->head_count)); + + // Emit all the locations and their sample counts. + for (const auto &pos : positions) { + PositionCountMap::const_iterator ret = node->pos_counts.find(pos); + assert(ret != node->pos_counts.end()); + WriteSourceLocation(node->info.start_line, pos); + fprintf(outf_, "%llu", + static_cast(ret->second.count)); + + // If there is a call at this location, emit the possible + // targets. For direct calls, this will be the exact function + // being invoked. For indirect calls, this will be a list of one + // or more functions. + TargetCountPairs target_count_pairs; + GetSortedTargetCountPairs(ret->second.target_map, &target_count_pairs); + for (const auto &target_count : target_count_pairs) { + fprintf(outf_, " %s:%llu", target_count.first.c_str(), + static_cast(target_count.second)); + } + fprintf(outf_, "\n"); + } + } + +private: + explicit LLVMSourceProfileWriter(FILE *outf, const StringIndexMap &map) + : map_(map), outf_(outf) {} + + int GetStringIndex(const string &str) { + StringIndexMap::const_iterator ret = map_.find(str); + assert(ret != map_.end()); + return ret->second; + } + + const StringIndexMap &map_; + FILE *outf_; +}; + +FILE *LLVMProfileWriter::WriteHeader(const string &output_filename) { + FILE *outf = + (output_filename == "-") ? stdout : fopen(output_filename.c_str(), "w"); + if (!outf) { + errs() << "Could not open %s for writing."; + return NULL; + } + return outf; +} + +void LLVMProfileWriter::WriteProfile() { + // Populate the symbol table. This table contains all the symbols + // for functions found in the binary. + StringIndexMap string_index_map; + StringTableUpdater::Update(symbol_map_, &string_index_map); + LLVMSourceProfileWriter::Write(outf_, symbol_map_, string_index_map); +} + +void LLVMProfileWriter::WriteFinish() { fclose(outf_); } + +bool LLVMProfileWriter::WriteToFile(const string &output_filename) { + if (DebugDump) + Dump(); + outf_ = WriteHeader(output_filename); + if (!outf_) + return false; + WriteProfile(); + WriteFinish(); + return true; +} + +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/profile.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/profile.h @@ -0,0 +1,84 @@ +//=-- profile.h - Function-level profile extractor --------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Class to extract function level profile from binary level samples. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_PROFILE_H_ +#define AUTOFDO_PROFILE_H_ + +#include +#include +#include +#include + +#include "llvm/ProfileData/SampleProfileReader.h" +#include "llvm/Support/DataTypes.h" + +namespace samplepgo { + +class Addr2line; +class SymbolMap; + +// Class to convert instruction level profile to source level profile. +class Profile { +public: + // Arguments: + // sample_reader: the sample reader provides the instruction level profile, + // i.e. map from instruction/callstack to profile counts. + // binary_name: the binary file name. + // addr2line: an Addr2line. + // symbol_map: the symbol map is written by this class to store all symbol + // information. + Profile(const std::unique_ptr &sample_reader, + const string &binary_name, Addr2line *addr2line, + SymbolMap *symbol_map) + : sample_reader_(sample_reader), binary_name_(binary_name), + addr2line_(addr2line), symbol_map_(symbol_map) {} + + ~Profile(); + + // Builds the source level profile. + void ComputeProfile(); + +private: + // Internal data structure that aggregates profile for each symbol. + struct ProfileMaps { + ProfileMaps(uint64_t start, uint64_t end) : start_addr(start), end_addr(end) {} + uint64_t GetAggregatedCount() const; + uint64_t start_addr; + uint64_t end_addr; + AddressCountMap address_count_map; + RangeCountMap range_count_map; + BranchCountMap branch_count_map; + }; + typedef map SymbolProfileMaps; + + // Returns the profile maps for a give function. + ProfileMaps *GetProfileMaps(uint64_t addr); + + // Aggregates raw profile for each symbol. + void AggregatePerFunctionProfile(); + + // Builds function level profile for specified function: + // 1. Traverses all instructions to build instruction map. + // 2. Unwinds the inline stack to add symbol count to each inlined symbol. + void ProcessPerFunctionProfile(string func_name, const ProfileMaps &map); + + const std::unique_ptr &sample_reader_; + const string binary_name_; + Addr2line *addr2line_; + SymbolMap *symbol_map_; + AddressCountMap global_addr_count_map_; + SymbolProfileMaps symbol_profile_maps_; +}; + +} // namespace samplepgo + +#endif // AUTOFDO_PROFILE_H_ Index: lib/ProfileData/PerfConverter/profile.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/profile.cpp @@ -0,0 +1,194 @@ +//=-- profile.cc - Class to represent source level profile -----------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "profile.h" + +#include +#include +#include +#include + +#include "instruction_map.h" +#include "symbol_map.h" + +#include "llvm/ProfileData/SampleProfileReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" + +using namespace llvm; + +static cl::opt UseLBR( + "use-lbr", + cl::desc("True if the profile was collected with LBR tracking enabled. Use " + "the -b option to 'perf record' to enable LBR tracking."), + cl::init(true)); + +static cl::opt SampleThreshold( + "sample-threshold", + cl::desc("Sample threshold divider. The threshold of total function count " + "is determined by max_sample_count/sample_threshold."), + cl::init(50000)); + +namespace samplepgo { +Profile::ProfileMaps *Profile::GetProfileMaps(uint64_t addr) { + const string *name; + uint64_t start_addr, end_addr; + if (symbol_map_->GetSymbolInfoByAddr(addr, &name, &start_addr, &end_addr)) { + pair ret = + symbol_profile_maps_.insert(SymbolProfileMaps::value_type(*name, NULL)); + if (ret.second) { + ret.first->second = new ProfileMaps(start_addr, end_addr); + } + return ret.first->second; + } else { + return NULL; + } +} + +void Profile::AggregatePerFunctionProfile() { + uint64_t start = symbol_map_->base_addr(); + const AddressCountMap *count_map = &sample_reader_->getAddressCountMap(); + for (const auto &addr_count : *count_map) { + ProfileMaps *maps = GetProfileMaps(addr_count.first + start); + if (maps != NULL) { + maps->address_count_map[addr_count.first + start] += addr_count.second; + } + } + const RangeCountMap *range_map = &sample_reader_->getRangeCountMap(); + for (const auto &range_count : *range_map) { + ProfileMaps *maps = GetProfileMaps(range_count.first.first + start); + if (maps != NULL) { + maps->range_count_map[make_pair(range_count.first.first + start, + range_count.first.second + start)] += + range_count.second; + } + } + const BranchCountMap *branch_map = &sample_reader_->getBranchCountMap(); + for (const auto &branch_count : *branch_map) { + ProfileMaps *maps = GetProfileMaps(branch_count.first.first + start); + if (maps != NULL) { + maps->branch_count_map[make_pair(branch_count.first.first + start, + branch_count.first.second + start)] += + branch_count.second; + } + } +} + +uint64_t Profile::ProfileMaps::GetAggregatedCount() const { + uint64_t ret = 0; + + if (range_count_map.size() > 0) { + for (const auto &range_count : range_count_map) { + ret += range_count.second; + } + } else { + for (const auto &addr_count : address_count_map) { + ret += addr_count.second; + } + } + return ret; +} + +void Profile::ProcessPerFunctionProfile(string func_name, + const ProfileMaps &maps) { + if (maps.GetAggregatedCount() <= + sample_reader_->getMaxCount() / SampleThreshold) { + return; + } + + symbol_map_->AddSymbol(func_name); + + InstructionMap inst_map(addr2line_, symbol_map_); + inst_map.BuildPerFunctionInstructionMap(func_name, maps.start_addr, + maps.end_addr); + + AddressCountMap map; + const AddressCountMap *map_ptr; + if (UseLBR) { + if (maps.range_count_map.size() == 0) { + return; + } + for (const auto &range_count : maps.range_count_map) { + for (InstructionMap::InstMap::const_iterator iter = + inst_map.inst_map().find(range_count.first.first); + iter != inst_map.inst_map().end() && + iter->first <= range_count.first.second; + ++iter) { + map[iter->first] += range_count.second; + } + } + map_ptr = ↦ + } else { + map_ptr = &maps.address_count_map; + } + + for (const auto &address_count : *map_ptr) { + InstructionMap::InstMap::const_iterator iter = + inst_map.inst_map().find(address_count.first); + if (iter == inst_map.inst_map().end()) { + continue; + } + const InstructionMap::InstInfo *info = iter->second; + if (info == NULL) { + continue; + } + const string *symbol; + if (!symbol_map_->GetSymbolInfoByAddr(address_count.first, &symbol, NULL, + NULL)) { + continue; + } + bool is_in_head = + symbol_map_->GetSymbolNameByStartAddr(address_count.first) != NULL; + if (is_in_head) { + symbol_map_->AddSymbolEntryCount(*symbol, address_count.second); + } + if (info->source_stack.size() > 0) { + symbol_map_->AddSourceCount(func_name, info->source_stack, + address_count.second, 0, SymbolMap::MAX); + } + } + + for (const auto &branch_count : maps.branch_count_map) { + InstructionMap::InstMap::const_iterator iter = + inst_map.inst_map().find(branch_count.first.first); + if (iter == inst_map.inst_map().end()) { + continue; + } + const InstructionMap::InstInfo *info = iter->second; + if (info == NULL) { + continue; + } + const string *callee = + symbol_map_->GetSymbolNameByStartAddr(branch_count.first.second); + if (!callee) { + continue; + } + symbol_map_->AddIndirectCallTarget(func_name, info->source_stack, *callee, + branch_count.second); + } + + for (const auto &addr_count : *map_ptr) { + global_addr_count_map_[addr_count.first] = addr_count.second; + } +} + +void Profile::ComputeProfile() { + AggregatePerFunctionProfile(); + // Traverse the symbol map to process the profiles. + for (const auto &symbol_profile : symbol_profile_maps_) { + ProcessPerFunctionProfile(symbol_profile.first, *symbol_profile.second); + } + symbol_map_->Merge(); +} + +Profile::~Profile() { + for (auto &symbol_maps : symbol_profile_maps_) { + delete symbol_maps.second; + } +} +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/profile_creator.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/profile_creator.cpp @@ -0,0 +1,102 @@ +//=-- profile_creator.cc - Create a SamplePGO Profile -----------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include + +#include "llvm/ProfileData/SampleProfileConverter.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +#include "addr2line.h" +#include "profile.h" +#include "profile_writer.h" +#include "symbol_map.h" +#include "symbolize/elf_reader.h" + +using namespace llvm; + +namespace samplepgo { +bool SampleProfileConverter::createProfile(const string &InputProfileName, + InputProfileKind Profiler, + const string &OutputProfileName) { + if (!readSample(InputProfileName, Profiler)) { + return false; + } + if (!createProfileFromSample(OutputProfileName)) { + return false; + } + return true; +} + +bool SampleProfileConverter::readSample(const string &InputProfileName, + InputProfileKind Profiler) { + switch (Profiler) { + case LinuxPerf: { + // Sets the regular expression to filter samples for a given binary. + char *dup_name = strdup(Binary.str().c_str()); + char *strip_ptr = strstr(dup_name, ".unstripped"); + if (strip_ptr) { + *strip_ptr = 0; + } + const char *file_base_name = basename(dup_name); + CHECK(file_base_name) << "Cannot find basename for: " << Binary; + + autofdo::ElfReader reader(Binary); + + Reader.reset( + new PerfDataSampleReader(InputProfileName, file_base_name)); + free(dup_name); + break; + } + + case Unknown: + llvm_unreachable("Invalid input profiler kind"); + } + + if (!Reader->readAndSetMaxCount()) { + errs() << "Error reading profile.\n"; + return false; + } + + return true; +} + +bool SampleProfileConverter::createProfileFromSample( + const string &output_profile_name) { + SymbolMap symbol_map(Binary); + set sampled_addrs = Reader->getSampledAddresses(); + + map sampled_functions = + symbol_map.GetSampledSymbolStartAddressSizeMap(sampled_addrs); + Addr2line *addr2line = + Addr2line::CreateWithSampledFunctions(Binary, &sampled_functions); + + if (addr2line == NULL) { + LOG(ERROR) << "Error reading binary " << Binary; + return false; + } + + Profile profile(Reader, Binary, addr2line, &symbol_map); + profile.ComputeProfile(); + + LLVMProfileWriter writer(symbol_map); + bool ret = writer.WriteToFile(output_profile_name); + + delete addr2line; + + return ret; +} + +uint64_t SampleProfileConverter::getTotalSamples() { + if (!Reader) + return 0; + + return Reader->getTotalSampleCount(); +} + +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/profile_writer.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/profile_writer.h @@ -0,0 +1,151 @@ +//=-- profile_writer.h - Class to build an LLVM sample profile --------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef SAMPLEPGO_PROFILE_WRITER_H_ +#define SAMPLEPGO_PROFILE_WRITER_H_ + +#include "symbol_map.h" + +namespace samplepgo { + +class SymbolMap; + +class ProfileWriter { +public: + explicit ProfileWriter(const SymbolMap &symbol_map) + : symbol_map_(symbol_map) {} + virtual ~ProfileWriter() {} + + virtual bool WriteToFile(const string &output_file) = 0; + void Dump(); + +protected: + const SymbolMap &symbol_map_; +}; + +class SymbolTraverser { +public: + virtual ~SymbolTraverser() {} + +protected: + SymbolTraverser() {} + virtual void Start(const SymbolMap &symbol_map) { + for (const auto &name_symbol : symbol_map.map()) { + if (name_symbol.second->total_count == 0) { + continue; + } + VisitTopSymbol(name_symbol.first, name_symbol.second); + Traverse(name_symbol.second); + } + } + virtual void VisitTopSymbol(const string &name, const Symbol *node) {} + virtual void Visit(const Symbol *node) = 0; + virtual void VisitCallsite(const Callsite &offset) {} + +private: + void Traverse(const Symbol *node) { + Visit(node); + for (const auto &callsite_symbol : node->callsites) { + VisitCallsite(callsite_symbol.first); + Traverse(callsite_symbol.second); + } + } +}; + +typedef map StringIndexMap; + +class StringTableUpdater : public SymbolTraverser { +public: + static void Update(const SymbolMap &symbol_map, StringIndexMap *map) { + StringTableUpdater updater(map); + updater.Start(symbol_map); + } + +protected: + virtual void Visit(const Symbol *node) { + (*map_)[node->info.func_name ? node->info.func_name : string()] = 0; + for (const auto &pos_count : node->pos_counts) { + for (const auto &name_count : pos_count.second.target_map) { + (*map_)[name_count.first] = 0; + } + } + } + + virtual void VisitTopSymbol(const string &name, const Symbol *node) { + (*map_)[name] = 0; + } + +private: + explicit StringTableUpdater(StringIndexMap *map) : map_(map) {} + StringIndexMap *map_; +}; + +// Writer class for LLVM profiles. This writes a text file with a +// simple frequency-based profile. +class LLVMProfileWriter : public ProfileWriter { +public: + explicit LLVMProfileWriter(const SymbolMap &symbol_map) + : ProfileWriter(symbol_map) {} + + virtual bool WriteToFile(const string &output_filename); + +private: + // Open the output file and write its header. + FILE *WriteHeader(const string &output_filename); + + // Write the body of the profile in text format. + // + // function1:total_samples:total_head_samples + // offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] + // offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] + // ... + // offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] + // + // Function names must be mangled in order for the profile loader to + // match them in the current translation unit. The two numbers in the + // function header specify how many total samples were accumulated in + // the function (first number), and the total number of samples accumulated + // at the prologue of the function (second number). This head sample + // count provides an indicator of how frequent is the function invoked. + // + // Each sampled line may contain several items. Some are optional + // (marked below): + // + // a- Source line offset. This number represents the line number + // in the function where the sample was collected. The line number + // is always relative to the line where symbol of the function + // is defined. So, if the function has its header at line 280, + // the offset 13 is at line 293 in the file. + // + // b- [OPTIONAL] Discriminator. This is used if the sampled program + // was compiled with DWARF discriminator support + // (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators) + // + // c- Number of samples. This is the number of samples collected by + // the profiler at this source location. + // + // d- [OPTIONAL] Potential call targets and samples. If present, this + // line contains a call instruction. This models both direct and + // indirect calls. Each called target is listed together with the + // number of samples. For example, + // + // 130: 7 foo:3 bar:2 baz:7 + // + // The above means that at relative line offset 130 there is a + // call instruction that calls one of foo(), bar() and baz(). With + // baz() being the relatively more frequent call target. + void WriteProfile(); + + // Close the profile file and flush out any trailing data. + void WriteFinish(); + + FILE *outf_; +}; +} // namespace samplepgo + +#endif // SAMPLEPGO_PROFILE_WRITER_H_ Index: lib/ProfileData/PerfConverter/profile_writer.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/profile_writer.cpp @@ -0,0 +1,185 @@ +//=-- profile_writer.cc - Write the converted profile to a sample profile ---=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include +#include +#include +#include +#include +#include +#include +#include + +#include "symbol_map.h" +#include "profile_writer.h" + +namespace samplepgo { + +class SourceProfileLengther : public SymbolTraverser { +public: + explicit SourceProfileLengther(const SymbolMap &symbol_map) + : length_(0), num_functions_(0) { + Start(symbol_map); + } + + int length() { return length_ + num_functions_ * 2; } + int num_functions() { return num_functions_; } + +protected: + virtual void VisitTopSymbol(const string &name, const Symbol *node) { + num_functions_++; + } + + virtual void Visit(const Symbol *node) { + // func_name, num_pos_counts, num_callsites + length_ += 3; + // offset_discr, num_targets, count * 2 + length_ += node->pos_counts.size() * 4; + // offset_discr + length_ += node->callsites.size(); + for (const auto &pos_count : node->pos_counts) { + // type, func_name * 2, count * 2 + length_ += pos_count.second.target_map.size() * 5; + } + } + +private: + int length_; + int num_functions_; +}; + +// Debugging support. ProfileDumper emits a detailed dump of the contents +// of the input profile. +class ProfileDumper : public SymbolTraverser { +public: + static void Write(const SymbolMap &symbol_map, const StringIndexMap &map) { + ProfileDumper writer(map); + writer.Start(symbol_map); + } + +protected: + void DumpSourceInfo(SourceInfo info, int indent) { + printf("%*sDirectory name: %s\n", indent, " ", info.dir_name); + printf("%*sFile name: %s\n", indent, " ", info.file_name); + printf("%*sFunction name: %s\n", indent, " ", info.func_name); + printf("%*sStart line: %u\n", indent, " ", info.start_line); + printf("%*sLine: %u\n", indent, " ", info.line); + printf("%*sDiscriminator: %u\n", indent, " ", info.discriminator); + } + + void PrintSourceLocation(uint32_t start_line, uint32_t offset) { + if (offset & 0xffff) { + printf("%u.%u: ", (offset >> 16) + start_line, offset & 0xffff); + } else { + printf("%u: ", (offset >> 16) + start_line); + } + } + + virtual void Visit(const Symbol *node) { + printf("Writing symbol: "); + node->Dump(4); + printf("\n"); + printf("Source information:\n"); + DumpSourceInfo(node->info, 0); + printf("\n"); + printf("Total sampled count: %llu\n", + static_cast(node->total_count)); + printf("Total sampled count in head bb: %llu\n", + static_cast(node->head_count)); + printf("\n"); + printf("Call sites:\n"); + int i = 0; + for (const auto &callsite_symbol : node->callsites) { + Callsite site = callsite_symbol.first; + Symbol *symbol = callsite_symbol.second; + printf(" #%d: site\n", i); + printf(" uint32_t: %u\n", site.first); + printf(" const char *: %s\n", site.second); + printf(" #%d: symbol: ", i); + symbol->Dump(0); + printf("\n"); + i++; + } + + printf("node->pos_counts.size() = %llu\n", + static_cast(node->pos_counts.size())); + printf("node->callsites.size() = %llu\n", + static_cast(node->callsites.size())); + vector positions; + for (const auto &pos_count : node->pos_counts) + positions.push_back(pos_count.first); + sort(positions.begin(), positions.end()); + i = 0; + for (const auto &pos : positions) { + PositionCountMap::const_iterator pos_count = node->pos_counts.find(pos); + assert(pos_count != node->pos_counts.end()); + uint32_t location = pos_count->first; + ProfileInfo info = pos_count->second; + + printf("#%d: location (line[.discriminator]) = ", i); + PrintSourceLocation(node->info.start_line, location); + printf("\n"); + printf("#%d: profile info execution count = %llu\n", i, + static_cast(info.count)); + printf("#%d: profile info number of instructions = %llu\n", i, + static_cast(info.num_inst)); + TargetCountPairs target_counts; + GetSortedTargetCountPairs(info.target_map, &target_counts); + printf("#%d: profile info target map size = %llu\n", i, + static_cast(info.target_map.size())); + printf("#%d: info.target_map:\n", i); + for (const auto &target_count : info.target_map) { + printf("\tGetStringIndex(target_count.first): %d\n", + GetStringIndex(target_count.first)); + printf("\ttarget_count.second: %llu\n", + static_cast(target_count.second)); + } + printf("\n"); + i++; + } + } + + virtual void VisitTopSymbol(const string &name, const Symbol *node) { + printf("VisitTopSymbol: %s\n", name.c_str()); + node->Dump(0); + printf("node->head_count: %llu\n", + static_cast(node->head_count)); + printf("GetStringIndex(%s): %u\n", name.c_str(), GetStringIndex(name)); + printf("\n"); + } + + virtual void VisitCallsite(const Callsite &callsite) { + printf("VisitCallSite: %s\n", callsite.second); + printf("callsite.first: %u\n", callsite.first); + printf("GetStringIndex(callsite.second): %u\n", + GetStringIndex(callsite.second ? callsite.second : string())); + } + +private: + explicit ProfileDumper(const StringIndexMap &map) : map_(map) {} + + int GetStringIndex(const string &str) { + StringIndexMap::const_iterator ret = map_.find(str); + assert(ret != map_.end()); + return ret->second; + } + + const StringIndexMap &map_; +}; + +// Emit a dump of the input profile on stdout. +void ProfileWriter::Dump() { + StringIndexMap string_index_map; + StringTableUpdater::Update(symbol_map_, &string_index_map); + SourceProfileLengther length(symbol_map_); + printf("Length of symbol map: %d\n", length.length() + 1); + printf("Number of functions: %d\n", length.num_functions()); + ProfileDumper::Write(symbol_map_, string_index_map); +} + +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/quipper/address_mapper.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/address_mapper.h @@ -0,0 +1,122 @@ +//=-- address_mapper.h ------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef QUIPPER_ADDRESS_MAPPER_ +#define QUIPPER_ADDRESS_MAPPER_ + +#include + +#include "llvm_port.h" + +namespace quipper { + +class AddressMapper { + public: + AddressMapper() {} + + // Copy constructor: copies mappings from |source| to this AddressMapper. This + // is useful for copying mappings from parent to child process upon fork(). It + // is also useful to copy kernel mappings to any process that is created. + AddressMapper(const AddressMapper& source); + + // Maps a new address range to quipper space. + // |remove_existing_mappings| indicates whether to remove old mappings that + // collide with the new range in real address space, indicating it has been + // unmapped. + // Returns true if mapping was successful. + bool Map(const uint64 real_addr, + const uint64 length, + bool remove_existing_mappings); + + // Like Map(real_addr, length, remove_existing_mappings). |id| is an + // identifier value to be stored along with the mapping. AddressMapper does + // not care whether it is unique compared to all other IDs passed in. That is + // up to the caller to keep track of. + bool MapWithID(const uint64 real_addr, + const uint64 length, + const uint64 id, + bool remove_existing_mappings); + + // Looks up |real_addr| and returns the mapped address. + bool GetMappedAddress(const uint64 real_addr, uint64* mapped_addr) const; + + // Looks up |real_addr| and returns the mapping's ID and offset from the + // start of the mapped space. + bool GetMappedIDAndOffset(const uint64 real_addr, + uint64* id, + uint64* offset) const; + + // Returns true if there are no mappings. + bool IsEmpty() const { + return mappings_.empty(); + } + + // Returns the number of address ranges that are currently mapped. + unsigned int GetNumMappedRanges() const { + return mappings_.size(); + } + + // Returns the maximum length of quipper space containing mapped areas. + // There may be gaps in between blocks. + // If the result is 2^64 (all of quipper space), this returns 0. Call + // IsEmpty() to distinguish this from actual emptiness. + uint64 GetMaxMappedLength() const; + + // Dumps the state of the address mapper to logs. Useful for debugging. + void DumpToLog() const; + + private: + struct MappedRange { + uint64 real_addr; + uint64 mapped_addr; + uint64 size; + + uint64 id; + + // Length of unmapped space after this range. + uint64 unmapped_space_after; + + // Determines if this range intersects another range in real space. + inline bool Intersects(const MappedRange& range) const { + return (real_addr <= range.real_addr + range.size - 1) && + (real_addr + size - 1 >= range.real_addr); + } + + // Determines if this range fully covers another range in real space. + inline bool Covers(const MappedRange& range) const { + return (real_addr <= range.real_addr) && + (real_addr + size - 1 >= range.real_addr + range.size - 1); + } + + // Determines if this range fully contains another range in real space. + // This is different from Covers() in that the boundaries cannot overlap. + inline bool Contains(const MappedRange& range) const { + return (real_addr < range.real_addr) && + (real_addr + size - 1 > range.real_addr + range.size - 1); + } + + // Determines if this range contains the given address |addr|. + inline bool ContainsAddress(uint64 addr) const { + return (addr >= real_addr && addr <= real_addr + size - 1); + } + }; + + // TODO(sque): implement with set or map to improve searching. + typedef std::list MappingList; + + // Removes an existing address mapping. + // Returns true if successful, false if no mapped address range was found. + bool Unmap(const MappedRange& range); + + // Container for all the existing mappings. + MappingList mappings_; +}; + +} // namespace quipper + +#endif // QUIPPER_ADDRESS_MAPPER_ Index: lib/ProfileData/PerfConverter/quipper/address_mapper.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/address_mapper.cc @@ -0,0 +1,210 @@ +//=-- address_mapper.cc -----------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm_port.h" +#include "address_mapper.h" + +namespace quipper { + +AddressMapper::AddressMapper(const AddressMapper& source) { + mappings_ = source.mappings_; +} + +bool AddressMapper::Map(const uint64 real_addr, + const uint64 size, + const bool remove_existing_mappings) { + return MapWithID(real_addr, size, kuint64max, remove_existing_mappings); +} + +bool AddressMapper::MapWithID(const uint64 real_addr, + const uint64 size, + const uint64 id, + bool remove_existing_mappings) { + MappedRange range; + range.real_addr = real_addr; + range.size = size; + range.id = id; + + if (size == 0) { + LOG(ERROR) << "Must allocate a nonzero-length address range."; + return false; + } + + // Check that this mapping does not overflow the address space. + if (real_addr + size - 1 != kuint64max && + !(real_addr + size > real_addr)) { + DumpToLog(); + LOG(ERROR) << "Address mapping at " << real_addr + << " with size " << size << " overflows."; + return false; + } + + // Check for collision with an existing mapping. This must be an overlap that + // does not result in one range being completely covered by another + MappingList::iterator iter; + MappingList mappings_to_delete; + bool old_range_found = false; + MappedRange old_range; + for (iter = mappings_.begin(); iter != mappings_.end(); ++iter) { + if (!iter->Intersects(range)) + continue; + // Quit if existing ranges that collide aren't supposed to be removed. + if (!remove_existing_mappings) + return false; + if (!old_range_found && iter->Covers(range) && iter->size > range.size) { + old_range_found = true; + old_range = *iter; + continue; + } + mappings_to_delete.push_back(*iter); + } + + while (!mappings_to_delete.empty()) { + const MappedRange& range = mappings_to_delete.front(); + CHECK(Unmap(range)); + mappings_to_delete.pop_front(); + } + + // Otherwise check for this range being covered by another range. If that + // happens, split or reduce the existing range to make room. + if (old_range_found) { + CHECK(Unmap(old_range)); + + uint64 gap_before = range.real_addr - old_range.real_addr; + uint64 gap_after = (old_range.real_addr + old_range.size) - + (range.real_addr + range.size); + + if (gap_before) + CHECK(MapWithID(old_range.real_addr, gap_before, old_range.id, false)); + + CHECK(MapWithID(range.real_addr, range.size, id, false)); + + if (gap_after) { + CHECK(MapWithID( + range.real_addr + range.size, gap_after, old_range.id, false)); + } + + return true; + } + + // Now search for a location for the new range. It should be in the first + // free block in quipper space. + + // If there is no existing mapping, add it to the beginning of quipper space. + if (mappings_.empty()) { + range.mapped_addr = 0; + range.unmapped_space_after = kuint64max - range.size; + mappings_.push_back(range); + return true; + } + + // If there is space before the first mapped range in quipper space, use it. + if (mappings_.begin()->mapped_addr >= range.size) { + range.mapped_addr = 0; + range.unmapped_space_after = mappings_.begin()->mapped_addr - range.size; + mappings_.push_front(range); + return true; + } + + // Otherwise, search through the existing mappings for a free block after one + // of them. + for (iter = mappings_.begin(); iter != mappings_.end(); ++iter) { + if (iter->unmapped_space_after < range.size) + continue; + + range.mapped_addr = iter->mapped_addr + iter->size; + range.unmapped_space_after = iter->unmapped_space_after - range.size; + iter->unmapped_space_after = 0; + + mappings_.insert(++iter, range); + return true; + } + + // If it still hasn't succeeded in mapping, it means there is no free space in + // quipper space large enough for a mapping of this size. + DumpToLog(); + LOG(ERROR) << "Could not find space to map addr=" << real_addr + << " with size " << size; + return false; +} + +void AddressMapper::DumpToLog() const { + MappingList::const_iterator it; + for (it = mappings_.begin(); it != mappings_.end(); ++it) { + LOG(INFO) << " real_addr: " << it->real_addr + << " mapped: " << it->mapped_addr + << " id: " << it->id + << " size: " << it->size; + } +} + +bool AddressMapper::GetMappedAddress(const uint64 real_addr, + uint64* mapped_addr) const { + CHECK(mapped_addr); + MappingList::const_iterator iter; + for (iter = mappings_.begin(); iter != mappings_.end(); ++iter) { + if (!iter->ContainsAddress(real_addr)) + continue; + *mapped_addr = iter->mapped_addr + real_addr - iter->real_addr; + return true; + } + return false; +} + +bool AddressMapper::GetMappedIDAndOffset(const uint64 real_addr, + uint64* id, + uint64* offset) const { + CHECK(id); + CHECK(offset); + MappingList::const_iterator iter; + for (iter = mappings_.begin(); iter != mappings_.end(); ++iter) { + if (!iter->ContainsAddress(real_addr)) + continue; + *id = iter->id; + *offset = real_addr - iter->real_addr; + return true; + } + return false; +} + +uint64 AddressMapper::GetMaxMappedLength() const { + if (IsEmpty()) + return 0; + + uint64 min = mappings_.begin()->mapped_addr; + + MappingList::const_iterator iter = mappings_.end(); + --iter; + uint64 max = iter->mapped_addr + iter->size; + + return max - min; +} + +bool AddressMapper::Unmap(const MappedRange& range) { + MappingList::iterator iter; + // TODO(sque): this is highly inefficient since Unmap() is called from a + // function that has already iterated to the right place within |mappings_|. + // For a first revision, I am sacrificing efficiency for of clarity, due to + // the trickiness of removing elements using iterators. + for (iter = mappings_.begin(); iter != mappings_.end(); ++iter) { + if (range.real_addr == iter->real_addr && range.size == iter->size) { + // Add the freed up space to the free space counter of the previous + // mapped region, if it exists. + if (iter != mappings_.begin()) { + --iter; + iter->unmapped_space_after += range.size + range.unmapped_space_after; + ++iter; + } + mappings_.erase(iter); + return true; + } + } + return false; +} + +} // namespace quipper Index: lib/ProfileData/PerfConverter/quipper/kernel/COPYING =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/kernel/COPYING @@ -0,0 +1,351 @@ + + NOTE! This copyright does *not* cover user programs that use kernel + services by normal system calls - this is merely considered normal use + of the kernel, and does *not* fall under the heading of "derived work". + Also note that the GPL below is copyrighted by the Free Software + Foundation, but the instance of code that it refers to (the linux + kernel) is copyrighted by me and others who actually wrote it. + + Linus Torvalds + +---------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. Index: lib/ProfileData/PerfConverter/quipper/kernel/perf_event.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/kernel/perf_event.h @@ -0,0 +1,1365 @@ +/* + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_EVENT_H +#define _LINUX_PERF_EVENT_H + +#include +#include +#include + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance event event_id types, used by the + * attr.event_id parameter of the sys_perf_event_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, + PERF_COUNT_HW_REF_CPU_CYCLES = 9, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + PERF_COUNT_HW_CACHE_NODE = 6, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" events provided by the kernel, even if the hardware + * does not support performance events. These events measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_event_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_READ = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, + PERF_SAMPLE_BRANCH_STACK = 1U << 11, + + PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ +}; + +/* + * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * + * If the user does not pass priv level information via branch_sample_type, + * the kernel uses the event's priv level. Branch and event priv levels do + * not have to match. Branch priv level is checked for permissions. + * + * The branch types can be combined, however BRANCH_ANY covers all types + * of branches and therefore it supersedes all the other types. + */ +enum perf_branch_sample_type { + PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ + + PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ +}; + +#define PERF_SAMPLE_BRANCH_PLM_ALL \ + (PERF_SAMPLE_BRANCH_USER|\ + PERF_SAMPLE_BRANCH_KERNEL|\ + PERF_SAMPLE_BRANCH_HV) + +/* + * The format of the data returned by read() on a perf event fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; + */ +enum perf_event_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, + + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ +}; + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ +#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ + +/* + * Hardware event_id to monitor via a performance monitoring event: + */ +struct perf_event_attr { + + /* + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; + + /* + * Type specific configuration information. + */ + __u64 config; + + union { + __u64 sample_period; + __u64 sample_freq; + }; + + __u64 sample_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + /* + * precise_ip: + * + * 0 - SAMPLE_IP can have arbitrary skid + * 1 - SAMPLE_IP must have constant skid + * 2 - SAMPLE_IP requested to have 0 skid + * 3 - SAMPLE_IP must have 0 skid + * + * See also PERF_RECORD_MISC_EXACT_IP + */ + precise_ip : 2, /* skid constraint */ + mmap_data : 1, /* non-exec mmap data */ + sample_id_all : 1, /* sample_type all events */ + + exclude_host : 1, /* don't count in host */ + exclude_guest : 1, /* don't count in guest */ + + __reserved_1 : 43; + + union { + __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_watermark; /* bytes before wakeup */ + }; + + __u32 bp_type; + union { + __u64 bp_addr; + __u64 config1; /* extension of config */ + }; + union { + __u64 bp_len; + __u64 config2; /* extension of config1 */ + }; + __u64 branch_sample_type; /* enum branch_sample_type */ +}; + +/* + * Ioctls that can be done on a perf event fd: + */ +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) + +enum perf_event_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_event_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw events in user-space. + * + * u32 seq, time_mult, time_shift, idx, width; + * u64 count, enabled, running; + * u64 cyc, time_offset; + * s64 pmc = 0; + * + * do { + * seq = pc->lock; + * barrier() + * + * enabled = pc->time_enabled; + * running = pc->time_running; + * + * if (pc->cap_usr_time && enabled != running) { + * cyc = rdtsc(); + * time_offset = pc->time_offset; + * time_mult = pc->time_mult; + * time_shift = pc->time_shift; + * } + * + * idx = pc->index; + * count = pc->offset; + * if (pc->cap_usr_rdpmc && idx) { + * width = pc->pmc_width; + * pmc = rdpmc(idx - 1); + * } + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware event identifier */ + __s64 offset; /* add to hardware event value */ + __u64 time_enabled; /* time event active */ + __u64 time_running; /* time event on cpu */ + union { + __u64 capabilities; + __u64 cap_usr_time : 1, + cap_usr_rdpmc : 1, + cap_____res : 62; + }; + + /* + * If cap_usr_rdpmc this field provides the bit-width of the value + * read using the rdpmc() or equivalent instruction. This can be used + * to sign extend the result like: + * + * pmc <<= 64 - width; + * pmc >>= 64 - width; // signed shift right + * count += pmc; + */ + __u16 pmc_width; + + /* + * If cap_usr_time the below fields can be used to compute the time + * delta since time_enabled (in ns) using rdtsc or similar. + * + * u64 quot, rem; + * u64 delta; + * + * quot = (cyc >> time_shift); + * rem = cyc & ((1 << time_shift) - 1); + * delta = time_offset + quot * time_mult + + * ((rem * time_mult) >> time_shift); + * + * Where time_offset,time_mult,time_shift and cyc are read in the + * seqcount loop described above. This delta can then be added to + * enabled and possible running (if idx), improving the scaling: + * + * enabled += delta; + * if (idx) + * running += delta; + * + * quot = count / running; + * rem = count % running; + * count = quot * enabled + (rem * enabled) / running; + */ + __u16 time_shift; + __u32 time_mult; + __u64 time_offset; + + /* + * Hole for extension of the self monitor capabilities + */ + + __u64 __reserved[120]; /* align to 1k */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading the @data_head value should issue an rmb(), on + * SMP capable platforms, after reading this value -- see + * perf_event_wakeup(). + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data. In this case + * the kernel will not over-write unread data. + */ + __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ +}; + +#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (2 << 0) +#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) +#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) +#define PERF_RECORD_MISC_GUEST_USER (5 << 0) + +/* + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + */ +#define PERF_RECORD_MISC_EXACT_IP (1 << 14) +/* + * Reserve the last bit to indicate some extended misc field + */ +#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) + +struct perf_event_header { + __u32 type; + __u16 misc; + __u16 size; +}; + +enum perf_event_type { + + /* + * If perf_event_attr.sample_id_all is set then all event types will + * have the sample_type selected fields related to where/when + * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID) + * described in PERF_RECORD_SAMPLE below, it will be stashed just after + * the perf_event_header and the fields already present for the existing + * fields, i.e. at the end of the payload. That way a newer perf.data + * file will be supported by older perf tools, with these new optional + * fields being ignored. + * + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ + PERF_RECORD_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * }; + */ + PERF_RECORD_LOST = 2, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_RECORD_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * }; + */ + PERF_RECORD_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 stream_id; + * }; + */ + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * }; + */ + PERF_RECORD_FORK = 7, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * + * struct read_format values; + * }; + */ + PERF_RECORD_READ = 8, + + /* + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD + * + * { struct read_format values; } && PERF_SAMPLE_READ + * + * { u64 nr, + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW + * + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * }; + */ + PERF_RECORD_SAMPLE = 9, + + PERF_RECORD_MAX, /* non-ABI */ +}; + +enum perf_callchain_context { + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, + + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, + + PERF_CONTEXT_MAX = (__u64)-4095, +}; + +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) +#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ + +#ifdef __KERNEL__ +/* + * Kernel-internal data types and definitions: + */ + +#ifdef CONFIG_PERF_EVENTS +# include +# include +# include +#endif + +struct perf_guest_info_callbacks { + int (*is_in_guest)(void); + int (*is_user_mode)(void); + unsigned long (*get_guest_ip)(void); +}; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PERF_MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u64 nr; + __u64 ip[PERF_MAX_STACK_DEPTH]; +}; + +struct perf_raw_record { + u32 size; + void *data; +}; + +/* + * single taken branch record layout: + * + * from: source instruction (may not always be a branch insn) + * to: branch target + * mispred: branch target was mispredicted + * predicted: branch target was predicted + * + * support for mispred, predicted is optional. In case it + * is not supported mispred = predicted = 0. + */ +struct perf_branch_entry { + __u64 from; + __u64 to; + __u64 mispred:1, /* target mispredicted */ + predicted:1,/* target predicted */ + reserved:62; +}; + +/* + * branch stack layout: + * nr: number of taken branches stored in entries[] + * + * Note that nr can vary from sample to sample + * branches (to, from) are stored from most recent + * to least recent, i.e., entries[0] contains the most + * recent branch. + */ +struct perf_branch_stack { + __u64 nr; + struct perf_branch_entry entries[0]; +}; + +struct task_struct; + +/* + * extra PMU register associated with an event + */ +struct hw_perf_event_extra { + u64 config; /* register value */ + unsigned int reg; /* register address or index */ + int alloc; /* extra register already allocated */ + int idx; /* index in shared_regs->regs[] */ +}; + +/** + * struct hw_perf_event - performance event hardware details: + */ +struct hw_perf_event { +#ifdef CONFIG_PERF_EVENTS + union { + struct { /* hardware */ + u64 config; + u64 last_tag; + unsigned long config_base; + unsigned long event_base; + int idx; + int last_cpu; + + struct hw_perf_event_extra extra_reg; + struct hw_perf_event_extra branch_reg; + }; + struct { /* software */ + struct hrtimer hrtimer; + }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + struct { /* breakpoint */ + struct arch_hw_breakpoint info; + struct list_head bp_list; + /* + * Crufty hack to avoid the chicken and egg + * problem hw_breakpoint has with context + * creation and event initalization. + */ + struct task_struct *bp_target; + }; +#endif + }; + int state; + local64_t prev_count; + u64 sample_period; + u64 last_period; + local64_t period_left; + u64 interrupts_seq; + u64 interrupts; + + u64 freq_time_stamp; + u64 freq_count_stamp; +#endif +}; + +/* + * hw_perf_event::state flags + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + +struct perf_event; + +/* + * Common implementation detail of pmu::{start,commit,cancel}_txn + */ +#define PERF_EVENT_TXN 0x1 + +/** + * struct pmu - generic performance monitoring unit + */ +struct pmu { + struct list_head entry; + + struct device *dev; + const struct attribute_group **attr_groups; + char *name; + int type; + + int * __percpu pmu_disable_count; + struct perf_cpu_context * __percpu pmu_cpu_context; + int task_ctx_nr; + + /* + * Fully disable/enable this PMU, can be used to protect from the PMI + * as well as for lazy/batch writing of the MSRs. + */ + void (*pmu_enable) (struct pmu *pmu); /* optional */ + void (*pmu_disable) (struct pmu *pmu); /* optional */ + + /* + * Try and initialize the event for this PMU. + * Should return -ENOENT when the @event doesn't match this PMU. + */ + int (*event_init) (struct perf_event *event); + +#define PERF_EF_START 0x01 /* start the counter when adding */ +#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ +#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ + + /* + * Adds/Removes a counter to/from the PMU, can be done inside + * a transaction, see the ->*_txn() methods. + */ + int (*add) (struct perf_event *event, int flags); + void (*del) (struct perf_event *event, int flags); + + /* + * Starts/Stops a counter present on the PMU. The PMI handler + * should stop the counter when perf_event_overflow() returns + * !0. ->start() will be used to continue. + */ + void (*start) (struct perf_event *event, int flags); + void (*stop) (struct perf_event *event, int flags); + + /* + * Updates the counter value of the event. + */ + void (*read) (struct perf_event *event); + + /* + * Group events scheduling is treated as a transaction, add + * group events as a whole and perform one schedulability test. + * If the test fails, roll back the whole group + * + * Start the transaction, after this ->add() doesn't need to + * do schedulability tests. + */ + void (*start_txn) (struct pmu *pmu); /* optional */ + /* + * If ->start_txn() disabled the ->add() schedulability test + * then ->commit_txn() is required to perform one. On success + * the transaction is closed. On error the transaction is kept + * open until ->cancel_txn() is called. + */ + int (*commit_txn) (struct pmu *pmu); /* optional */ + /* + * Will cancel the transaction, assumes ->del() is called + * for each successful ->add() during the transaction. + */ + void (*cancel_txn) (struct pmu *pmu); /* optional */ + + /* + * Will return the value for perf_event_mmap_page::index for this event, + * if no implementation is provided it will default to: event->hw.idx + 1. + */ + int (*event_idx) (struct perf_event *event); /*optional */ + + /* + * flush branch stack on context-switches (needed in cpu-wide mode) + */ + void (*flush_branch_stack) (void); +}; + +/** + * enum perf_event_active_state - the states of a event + */ +enum perf_event_active_state { + PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_OFF = -1, + PERF_EVENT_STATE_INACTIVE = 0, + PERF_EVENT_STATE_ACTIVE = 1, +}; + +struct file; +struct perf_sample_data; + +typedef void (*perf_overflow_handler_t)(struct perf_event *, + struct perf_sample_data *, + struct pt_regs *regs); + +enum perf_group_flag { + PERF_GROUP_SOFTWARE = 0x1, +}; + +#define SWEVENT_HLIST_BITS 8 +#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) + +struct swevent_hlist { + struct hlist_head heads[SWEVENT_HLIST_SIZE]; + struct rcu_head rcu_head; +}; + +#define PERF_ATTACH_CONTEXT 0x01 +#define PERF_ATTACH_GROUP 0x02 +#define PERF_ATTACH_TASK 0x04 + +#ifdef CONFIG_CGROUP_PERF +/* + * perf_cgroup_info keeps track of time_enabled for a cgroup. + * This is a per-cpu dynamically allocated data structure. + */ +struct perf_cgroup_info { + u64 time; + u64 timestamp; +}; + +struct perf_cgroup { + struct cgroup_subsys_state css; + struct perf_cgroup_info *info; /* timing info, one per cpu */ +}; +#endif + +struct ring_buffer; + +/** + * struct perf_event - performance event kernel representation: + */ +struct perf_event { +#ifdef CONFIG_PERF_EVENTS + struct list_head group_entry; + struct list_head event_entry; + struct list_head sibling_list; + struct hlist_node hlist_entry; + int nr_siblings; + int group_flags; + struct perf_event *group_leader; + struct pmu *pmu; + + enum perf_event_active_state state; + unsigned int attach_state; + local64_t count; + atomic64_t child_count; + + /* + * These are the total time in nanoseconds that the event + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task event) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the event is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the event is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the event was enabled + * tstamp_running: the notional time when the event was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * event was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + + /* + * timestamp shadows the actual context timing but it can + * be safely used in NMI interrupt context. It reflects the + * context time as it was when the event was last scheduled in. + * + * ctx_time already accounts for ctx->timestamp. Therefore to + * compute ctx_time for a sample, simply add perf_clock(). + */ + u64 shadow_ctx_time; + + struct perf_event_attr attr; + u16 header_size; + u16 id_header_size; + u16 read_size; + struct hw_perf_event hw; + + struct perf_event_context *ctx; + struct file *filp; + + /* + * These accumulate total time (in nanoseconds) that children + * events have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + + /* + * Protect attach/detach and child_list: + */ + struct mutex child_mutex; + struct list_head child_list; + struct perf_event *parent; + + int oncpu; + int cpu; + + struct list_head owner_entry; + struct task_struct *owner; + + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + int mmap_locked; + struct user_struct *mmap_user; + struct ring_buffer *rb; + struct list_head rb_entry; + + /* poll related */ + wait_queue_head_t waitq; + struct fasync_struct *fasync; + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_kill; + int pending_disable; + struct irq_work pending; + + atomic_t event_limit; + + void (*destroy)(struct perf_event *); + struct rcu_head rcu_head; + + struct pid_namespace *ns; + u64 id; + + perf_overflow_handler_t overflow_handler; + void *overflow_handler_context; + +#ifdef CONFIG_EVENT_TRACING + struct ftrace_event_call *tp_event; + struct event_filter *filter; +#ifdef CONFIG_FUNCTION_TRACER + struct ftrace_ops ftrace_ops; +#endif +#endif + +#ifdef CONFIG_CGROUP_PERF + struct perf_cgroup *cgrp; /* cgroup event is attach to */ + int cgrp_defer_enabled; +#endif + +#endif /* CONFIG_PERF_EVENTS */ +}; + +enum perf_event_context_type { + task_context, + cpu_context, +}; + +/** + * struct perf_event_context - event context structure + * + * Used as a container for task events and CPU events as well: + */ +struct perf_event_context { + struct pmu *pmu; + enum perf_event_context_type type; + /* + * Protect the states of the events in the list, + * nr_active, and the list: + */ + raw_spinlock_t lock; + /* + * Protect the list of events. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; + + struct list_head pinned_groups; + struct list_head flexible_groups; + struct list_head event_list; + int nr_events; + int nr_active; + int is_active; + int nr_stat; + int nr_freq; + int rotate_disable; + atomic_t refcount; + struct task_struct *task; + + /* + * Context clock, runs when context enabled. + */ + u64 time; + u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_event_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + int nr_cgroups; /* cgroup evts */ + int nr_branch_stack; /* branch_stack evt */ + struct rcu_head rcu_head; +}; + +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + +/** + * struct perf_event_cpu_context - per cpu event context structure + */ +struct perf_cpu_context { + struct perf_event_context ctx; + struct perf_event_context *task_ctx; + int active_oncpu; + int exclusive; + struct list_head rotation_list; + int jiffies_interval; + struct pmu *active_pmu; + struct perf_cgroup *cgrp; +}; + +struct perf_output_handle { + struct perf_event *event; + struct ring_buffer *rb; + unsigned long wakeup; + unsigned long size; + void *addr; + int page; +}; + +#ifdef CONFIG_PERF_EVENTS + +extern int perf_pmu_register(struct pmu *pmu, char *name, int type); +extern void perf_pmu_unregister(struct pmu *pmu); + +extern int perf_num_counters(void); +extern const char *perf_pmu_name(void); +extern void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task); +extern void __perf_event_task_sched_out(struct task_struct *prev, + struct task_struct *next); +extern int perf_event_init_task(struct task_struct *child); +extern void perf_event_exit_task(struct task_struct *child); +extern void perf_event_free_task(struct task_struct *task); +extern void perf_event_delayed_put(struct task_struct *task); +extern void perf_event_print_debug(void); +extern void perf_pmu_disable(struct pmu *pmu); +extern void perf_pmu_enable(struct pmu *pmu); +extern int perf_event_task_disable(void); +extern int perf_event_task_enable(void); +extern int perf_event_refresh(struct perf_event *event, int refresh); +extern void perf_event_update_userpage(struct perf_event *event); +extern int perf_event_release_kernel(struct perf_event *event); +extern struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, + int cpu, + struct task_struct *task, + perf_overflow_handler_t callback, + void *context); +extern u64 perf_event_read_value(struct perf_event *event, + u64 *enabled, u64 *running); + + +struct perf_sample_data { + u64 type; + + u64 ip; + struct { + u32 pid; + u32 tid; + } tid_entry; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + struct { + u32 cpu; + u32 reserved; + } cpu_entry; + u64 period; + struct perf_callchain_entry *callchain; + struct perf_raw_record *raw; + struct perf_branch_stack *br_stack; +}; + +static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) +{ + data->addr = addr; + data->raw = NULL; + data->br_stack = NULL; +} + +extern void perf_output_sample(struct perf_output_handle *handle, + struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void perf_prepare_sample(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs); + +extern int perf_event_overflow(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); + +static inline bool is_sampling_event(struct perf_event *event) +{ + return event->attr.sample_period != 0; +} + +/* + * Return 1 for a software event, 0 for a hardware event + */ +static inline int is_software_event(struct perf_event *event) +{ + return event->pmu->task_ctx_nr == perf_sw_context; +} + +extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); + +#ifndef perf_arch_fetch_caller_regs +static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } +#endif + +/* + * Take a snapshot of the regs. Skip ip and frame pointer to + * the nth caller. We only need a few of the regs: + * - ip for PERF_SAMPLE_IP + * - cs for user_mode() tests + * - bp for callchains + * - eflags, for future purposes, just in case + */ +static inline void perf_fetch_caller_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(*regs)); + + perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); +} + +static __always_inline void +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) +{ + struct pt_regs hot_regs; + + if (static_key_false(&perf_swevent_enabled[event_id])) { + if (!regs) { + perf_fetch_caller_regs(&hot_regs); + regs = &hot_regs; + } + __perf_sw_event(event_id, nr, regs, addr); + } +} + +extern struct static_key_deferred perf_sched_events; + +static inline void perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) +{ + if (static_key_false(&perf_sched_events.key)) + __perf_event_task_sched_in(prev, task); +} + +static inline void perf_event_task_sched_out(struct task_struct *prev, + struct task_struct *next) +{ + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); + + if (static_key_false(&perf_sched_events.key)) + __perf_event_task_sched_out(prev, next); +} + +extern void perf_event_mmap(struct vm_area_struct *vma); +extern struct perf_guest_info_callbacks *perf_guest_cbs; +extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); +extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); + +extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_fork(struct task_struct *tsk); + +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); + +static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + +extern int sysctl_perf_event_paranoid; +extern int sysctl_perf_event_mlock; +extern int sysctl_perf_event_sample_rate; + +extern int perf_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +static inline bool perf_paranoid_tracepoint_raw(void) +{ + return sysctl_perf_event_paranoid > -1; +} + +static inline bool perf_paranoid_cpu(void) +{ + return sysctl_perf_event_paranoid > 0; +} + +static inline bool perf_paranoid_kernel(void) +{ + return sysctl_perf_event_paranoid > 1; +} + +extern void perf_event_init(void); +extern void perf_tp_event(u64 addr, u64 count, void *record, + int entry_size, struct pt_regs *regs, + struct hlist_head *head, int rctx); +extern void perf_bp_event(struct perf_event *event, void *data); + +#ifndef perf_misc_flags +# define perf_misc_flags(regs) \ + (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) +# define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + +static inline bool has_branch_stack(struct perf_event *event) +{ + return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; +} + +extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size); +extern void perf_output_end(struct perf_output_handle *handle); +extern void perf_output_copy(struct perf_output_handle *handle, + const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); +extern void perf_event_enable(struct perf_event *event); +extern void perf_event_disable(struct perf_event *event); +extern void perf_event_task_tick(void); +#else +static inline void +perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { } +static inline void +perf_event_task_sched_out(struct task_struct *prev, + struct task_struct *next) { } +static inline int perf_event_init_task(struct task_struct *child) { return 0; } +static inline void perf_event_exit_task(struct task_struct *child) { } +static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_delayed_put(struct task_struct *task) { } +static inline void perf_event_print_debug(void) { } +static inline int perf_event_task_disable(void) { return -EINVAL; } +static inline int perf_event_task_enable(void) { return -EINVAL; } +static inline int perf_event_refresh(struct perf_event *event, int refresh) +{ + return -EINVAL; +} + +static inline void +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } +static inline void +perf_bp_event(struct perf_event *event, void *data) { } + +static inline int perf_register_guest_info_callbacks +(struct perf_guest_info_callbacks *callbacks) { return 0; } +static inline int perf_unregister_guest_info_callbacks +(struct perf_guest_info_callbacks *callbacks) { return 0; } + +static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_fork(struct task_struct *tsk) { } +static inline void perf_event_init(void) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } +static inline void perf_event_enable(struct perf_event *event) { } +static inline void perf_event_disable(struct perf_event *event) { } +static inline void perf_event_task_tick(void) { } +#endif + +#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) + +/* + * This has to have a higher priority than migration_notifier in sched.c. + */ +#define perf_cpu_notifier(fn) \ +do { \ + static struct notifier_block fn##_nb __cpuinitdata = \ + { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ + fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ + (void *)(unsigned long)smp_processor_id()); \ + fn(&fn##_nb, (unsigned long)CPU_STARTING, \ + (void *)(unsigned long)smp_processor_id()); \ + fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ + (void *)(unsigned long)smp_processor_id()); \ + register_cpu_notifier(&fn##_nb); \ +} while (0) + + +#define PMU_FORMAT_ATTR(_name, _format) \ +static ssize_t \ +_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ + \ +static struct device_attribute format_attr_##_name = __ATTR_RO(_name) + +#endif /* __KERNEL__ */ +#endif /* _LINUX_PERF_EVENT_H */ Index: lib/ProfileData/PerfConverter/quipper/kernel/perf_internals.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/kernel/perf_internals.h @@ -0,0 +1,306 @@ +// Copied from kernel sources. See COPYING for license details. + +#ifndef PERF_INTERNALS_H_ +#define PERF_INTERNALS_H_ + +#include +#include + +#include "perf_event.h" + +namespace quipper { + +// These typedefs are from tools/perf/util/types.h in the kernel. +typedef uint64_t u64; +typedef int64_t s64; +typedef unsigned int u32; +typedef signed int s32; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned char u8; +typedef signed char s8; + +#define BITS_PER_BYTE 8 +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) + +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#define MAX_EVENT_NAME 64 + +// These data structures have been copied from the kernel. See files under +// tools/perf/util. + +enum { + HEADER_RESERVED = 0, /* always cleared */ + HEADER_FIRST_FEATURE = 1, + HEADER_TRACE_INFO = 1, + HEADER_BUILD_ID, + + HEADER_HOSTNAME, + HEADER_OSRELEASE, + HEADER_VERSION, + HEADER_ARCH, + HEADER_NRCPUS, + HEADER_CPUDESC, + HEADER_CPUID, + HEADER_TOTAL_MEM, + HEADER_CMDLINE, + HEADER_EVENT_DESC, + HEADER_CPU_TOPOLOGY, + HEADER_NUMA_TOPOLOGY, + HEADER_BRANCH_STACK, + HEADER_LAST_FEATURE, + HEADER_FEAT_BITS = 256, +}; + +/* pseudo samples injected by perf-inject */ +enum perf_user_event_type { /* above any possible kernel type */ + PERF_RECORD_USER_TYPE_START = 64, + PERF_RECORD_HEADER_ATTR = 64, + PERF_RECORD_HEADER_EVENT_TYPE = 65, + PERF_RECORD_HEADER_TRACING_DATA = 66, + PERF_RECORD_HEADER_BUILD_ID = 67, + PERF_RECORD_FINISHED_ROUND = 68, + PERF_RECORD_HEADER_HOSTNAME = 69, + PERF_RECORD_HEADER_OSRELEASE = 70, + PERF_RECORD_HEADER_VERSION = 71, + PERF_RECORD_HEADER_ARCH = 72, + PERF_RECORD_HEADER_NRCPUS = 73, + PERF_RECORD_HEADER_CPUDESC = 74, + PERF_RECORD_HEADER_CPUID = 75, + PERF_RECORD_HEADER_TOTAL_MEM = 76, + PERF_RECORD_HEADER_CMDLINE = 77, + PERF_RECORD_HEADER_EVENT_DESC = 78, + PERF_RECORD_HEADER_CPU_TOPOLOGY = 79, + PERF_RECORD_HEADER_NUMA_TOPOLOGY = 80, + PERF_RECORD_HEADER_PMU_MAPPINGS = 81, + PERF_RECORD_HEADER_MAX +}; + +struct perf_file_section { + u64 offset; + u64 size; +}; + +struct perf_file_attr { + struct perf_event_attr attr; + struct perf_file_section ids; +}; + +struct perf_trace_event_type { + u64 event_id; + char name[MAX_EVENT_NAME]; +}; + +struct perf_file_header { + u64 magic; + u64 size; + u64 attr_size; + struct perf_file_section attrs; + struct perf_file_section data; + struct perf_file_section event_types; + DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); +}; + +struct perf_pipe_file_header { + u64 magic; + u64 size; +}; + +struct attr_event { + struct perf_event_header header; + struct perf_event_attr attr; + uint64_t id[]; +}; + +struct event_type_event { + struct perf_event_header header; + struct perf_trace_event_type event_type; +}; + +struct event_desc_event { + struct perf_event_header header; + uint32_t num_events; + uint32_t event_header_size; + uint8_t more_data[]; +}; + +enum { + SHOW_KERNEL = 1, + SHOW_USER = 2, + SHOW_HV = 4, +}; + +/* + * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * + */ +struct ip_event { + struct perf_event_header header; + u64 ip; + u32 pid, tid; + unsigned char __more_data[]; +}; + +struct mmap_event { + struct perf_event_header header; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; + char filename[PATH_MAX]; +}; + +struct comm_event { + struct perf_event_header header; + u32 pid, tid; + char comm[16]; +}; + +struct fork_event { + struct perf_event_header header; + u32 pid, ppid; + u32 tid, ptid; + u64 time; +}; + +struct lost_event { + struct perf_event_header header; + u64 id; + u64 lost; +}; + +// This struct is found in comments in perf_event.h, and can be found as a +// struct in tools/perf/util/python.c in the kernel. +struct throttle_event { + struct perf_event_header header; + u64 time; + u64 id; + u64 stream_id; +}; + +/* + * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID + */ +struct read_event { + struct perf_event_header header; + u32 pid, tid; + u64 value; + u64 time_enabled; + u64 time_running; + u64 id; +}; + +struct sample_event{ + struct perf_event_header header; + u64 array[]; +}; + +// Taken from tools/perf/util/include/linux/kernel.h +#define ALIGN(x,a) __ALIGN_MASK(x,(__typeof__(x))(a)-1) +#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) + +// If this is changed, kBuildIDArraySize in perf_reader.h must also be changed. +#define BUILD_ID_SIZE 20 + +struct build_id_event { + struct perf_event_header header; + pid_t pid; + u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; + char filename[]; +}; + +#undef ALIGN +#undef __ALIGN_MASK +#undef BUILD_ID_SIZE + +// The addition of throttle_event is a custom addition for quipper. +// It is used for both THROTTLE and UNTHROTTLE events. +typedef union event_union { + struct perf_event_header header; + struct ip_event ip; + struct mmap_event mmap; + struct comm_event comm; + struct fork_event fork; + struct lost_event lost; + struct throttle_event throttle; + struct read_event read; + struct sample_event sample; + struct build_id_event build_id; +} event_t; + +struct ip_callchain { + u64 nr; + u64 ips[0]; +}; + +struct branch_flags { + u64 mispred:1; + u64 predicted:1; + u64 reserved:62; +}; + +struct branch_entry { + u64 from; + u64 to; + struct branch_flags flags; +}; + +struct branch_stack { + u64 nr; + struct branch_entry entries[0]; +}; + +// All the possible fields of a perf sample. This is not an actual data +// structure found in raw perf data, as each field may or may not be present in +// the data. +struct perf_sample { + u64 ip; + u32 pid, tid; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + u64 period; + u32 cpu; + struct { // Copied from struct read_event. + u64 time_enabled; + u64 time_running; + u64 id; + } read; + u32 raw_size; + void *raw_data; + struct ip_callchain *callchain; + struct branch_stack *branch_stack; + + perf_sample() : raw_data(NULL), + callchain(NULL), + branch_stack(NULL) {} + ~perf_sample() { + if (callchain) { + delete [] callchain; + callchain = NULL; + } + if (branch_stack) { + delete [] branch_stack; + branch_stack = NULL; + } + if (raw_data) { + delete [] reinterpret_cast(raw_data); + raw_data = NULL; + } + } +}; + +// End data structures copied from the kernel. + +#undef BITS_PER_BYTE +#undef DIV_ROUND_UP +#undef BITS_TO_LONGS +#undef DECLARE_BITMAP +#undef MAX_EVENT_NAME + +} // namespace quipper + +#endif /*PERF_INTERNALS_H_*/ Index: lib/ProfileData/PerfConverter/quipper/perf_parser.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/perf_parser.h @@ -0,0 +1,237 @@ +//=-- perf_parser.h ---------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef QUIPPER_PERF_PARSER_H_ +#define QUIPPER_PERF_PARSER_H_ + +#include +#include + +#include "perf_reader.h" + +namespace quipper { + +namespace { + +// By default, most samples must be properly mapped in order for sample mapping +// to be considered successful. +const float kDefaultSampleThreshold = 95.0f; + +} // namespace + +class AddressMapper; + +// A struct containing all relevant info for a mapped DSO, independent of any +// samples. +struct DSOInfo { + string name; + string build_id; + + // Comparator that allows this to be stored in a STL set. + bool operator<(const DSOInfo& other) const { + if (name == other.name) + return build_id < other.build_id; + return name < other.name; + } +}; + +struct ParsedEvent { + // TODO(sque): Turn this struct into a class to privatize member variables. + ParsedEvent() : command_(NULL) {} + + // Stores address of the event pointer in |events_|. + // We store an event_t** instead of an event_t* to avoid having multiple + // copies of pointers returned by calloc. + event_t** raw_event; + + // For mmap events, use this to count the number of samples that are in this + // region. + uint32 num_samples_in_mmap_region; + + // Command associated with this sample. + const string* command_; + + // Accessor for command string. + const string command() const { + if (command_) + return *command_; + return string(); + } + + void set_command(const string& command) { + command_ = &command; + } + + // A struct that contains a DSO + offset pair. + struct DSOAndOffset { + const DSOInfo* dso_info_; + uint64 offset_; + + // Accessor methods. + const string dso_name() const { + if (dso_info_) + return dso_info_->name; + return string(); + } + const string build_id() const { + if (dso_info_) + return dso_info_->build_id; + return string(); + } + uint64 offset() const { + return offset_; + } + + DSOAndOffset() : dso_info_(NULL), + offset_(0) {} + } dso_and_offset; + + // DSO+offset info for callchain. + std::vector callchain; + + // DSO + offset info for branch stack entries. + struct BranchEntry { + bool predicted; + DSOAndOffset from; + DSOAndOffset to; + }; + std::vector branch_stack; +}; + +struct PerfEventStats { + // Number of each type of event. + uint32 num_sample_events; + uint32 num_mmap_events; + uint32 num_comm_events; + uint32 num_fork_events; + uint32 num_exit_events; + + // Number of sample events that were successfully mapped using the address + // mapper. The mapping is recorded regardless of whether the address in the + // perf sample event itself was assigned the remapped address. The latter is + // indicated by |did_remap|. + uint32 num_sample_events_mapped; + + // Whether address remapping was enabled during event parsing. + bool did_remap; +}; + +class PerfParser : public PerfReader { + public: + PerfParser(); + ~PerfParser(); + + struct Options { + // For synthetic address mapping. + bool do_remap; + // Set this flag to discard non-sample events that don't have any associated + // sample events. e.g. MMAP regions with no samples in them. + bool discard_unused_events; + // When mapping perf sample events, at least this percentage of them must be + // successfully mapped in order for ProcessEvents() to return true. + float sample_mapping_percentage_threshold; + + Options() : do_remap(false), + discard_unused_events(false), + sample_mapping_percentage_threshold(kDefaultSampleThreshold) {} + }; + + // Constructor that takes in options at PerfParser creation time. + PerfParser(const Options& options); + + // Pass in a struct containing various options. + void set_options(const Options& options); + + // Gets parsed event/sample info from raw event data. + bool ParseRawEvents(); + + const std::vector& parsed_events() const { + return parsed_events_; + } + + // Returns an array of pointers to |parsed_events_| sorted by sample time. + // The first time this is called, it will create the sorted array. + const std::vector& GetEventsSortedByTime() const { + return parsed_events_sorted_by_time_; + } + + const PerfEventStats& stats() const { + return stats_; + } + + protected: + // Defines a type for a pid:tid pair. + typedef std::pair PidTid; + + // Sort |parsed_events_| by time, storing the results in + // |parsed_events_sorted_by_time_|. + void SortParsedEvents(); + + // Used for processing events. e.g. remapping with synthetic addresses. + bool ProcessEvents(); + bool MapMmapEvent(struct mmap_event* event, uint64 id); + bool MapForkEvent(const struct fork_event& event); + bool MapCommEvent(const struct comm_event& event); + + // Create a process mapper for a process. Optionally pass in a parent pid + // |ppid| from which to copy mappings. + void CreateProcessMapper(uint32 pid, uint32 ppid = -1); + + // Does a sample event remap and then returns DSO name and offset of sample. + bool MapSampleEvent(ParsedEvent* parsed_event); + + void ResetAddressMappers(); + + std::vector parsed_events_; + std::vector parsed_events_sorted_by_time_; + + Options options_; // Store all option flags as one struct. + + std::map process_mappers_; + + // Maps pid/tid to commands. + std::map pidtid_to_comm_map_; + + // A set to store the actual command strings. + std::set commands_; + + PerfEventStats stats_; + + // A set of unique DSOs that may be referenced by multiple events. + std::set dso_set_; + + private: + // Calls MapIPAndPidAndGetNameAndOffset() on the callchain of a sample event. + bool MapCallchain(const struct ip_event& event, + uint64 original_event_addr, + struct ip_callchain* callchain, + ParsedEvent* parsed_event); + + // Trims the branch stack for null entries and calls + // MapIPAndPidAndGetNameAndOffset() on each entry. + bool MapBranchStack(const struct ip_event& event, + struct branch_stack* branch_stack, + ParsedEvent* parsed_event); + + // This maps a sample event and returns the mapped address, DSO name, and + // offset within the DSO. This is a private function because the API might + // change in the future, and we don't want derived classes to be stuck with an + // obsolete API. + bool MapIPAndPidAndGetNameAndOffset( + uint64 ip, + uint32 pid, + uint16 misc, + uint64* new_ip, + ParsedEvent::DSOAndOffset* dso_and_offset); + + DISALLOW_COPY_AND_ASSIGN(PerfParser); +}; + +} // namespace quipper + +#endif // QUIPPER_PERF_PARSER_H_ Index: lib/ProfileData/PerfConverter/quipper/perf_parser.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/perf_parser.cc @@ -0,0 +1,595 @@ +//=-- perf_parser.cc --------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "perf_parser.h" + +#include + +#include +#include +#include + +#include "address_mapper.h" +#include "utils.h" + +namespace quipper { + +namespace { + +struct EventAndTime { + ParsedEvent* event; + uint64 time; +}; + +// Returns true if |e1| has an earlier timestamp than |e2|. The args are const +// pointers instead of references because of the way this function is used when +// calling std::stable_sort. +bool CompareParsedEventTimes(const EventAndTime* e1, const EventAndTime* e2) { + return (e1->time < e2->time); +} + +// Name and ID of the kernel swapper process. +const char kSwapperCommandName[] = "swapper"; +const uint32 kSwapperPid = 0; + +enum CallchainContext { + // These entries in a callchain are special cases. + kCallchainInitialContextIndex = 0, // Kernel vs user. + kCallchainBaseAddressIndex = 1, // Same as the base sample address. + // Start reading the callchain from this entry. + kFirstRelevantCallchainIndex, +}; + +bool IsNullBranchStackEntry(const struct branch_entry& entry) { + return (!entry.from && !entry.to); +} + +} // namespace + +PerfParser::PerfParser() {} + +PerfParser::~PerfParser() { + ResetAddressMappers(); +} + +PerfParser::PerfParser(const PerfParser::Options& options) { + options_ = options; +} + +void PerfParser::set_options(const PerfParser::Options& options) { + options_ = options; +} + +bool PerfParser::ParseRawEvents() { + ResetAddressMappers(); + parsed_events_.resize(events_.size()); + for (size_t i = 0; i < events_.size(); ++i) { + ParsedEvent& parsed_event = parsed_events_[i]; + parsed_event.raw_event = &events_[i]; + } + SortParsedEvents(); + ProcessEvents(); + + if (!options_.discard_unused_events) + return true; + + // Some MMAP events' mapped regions will not have any samples. These MMAP + // events should be dropped. |parsed_events_| should be reconstructed without + // these events. + size_t write_index = 0; + size_t read_index; + for (read_index = 0; read_index < parsed_events_.size(); ++read_index) { + const ParsedEvent& event = parsed_events_[read_index]; + if ((*event.raw_event)->header.type == PERF_RECORD_MMAP && + event.num_samples_in_mmap_region == 0) { + continue; + } + if (read_index != write_index) + parsed_events_[write_index] = event; + ++write_index; + } + CHECK_LE(write_index, parsed_events_.size()); + parsed_events_.resize(write_index); + + // Now regenerate the sorted event list again. These are pointers to events + // so they must be regenerated after a resize() of the ParsedEvent vector. + SortParsedEvents(); + + return true; +} + +void PerfParser::SortParsedEvents() { + std::vector events_and_times; + events_and_times.resize(parsed_events_.size()); + for (size_t i = 0; i < parsed_events_.size(); ++i) { + EventAndTime* event_and_time = new EventAndTime; + + // Store the timestamp and event pointer in an array. + event_and_time->event = &parsed_events_[i]; + + struct perf_sample sample_info; + CHECK(ReadPerfSampleInfo(**parsed_events_[i].raw_event, &sample_info)); + event_and_time->time = sample_info.time; + + events_and_times[i] = event_and_time; + } + // Sort the events based on timestamp, and then populate the sorted event + // vector in sorted order. + std::stable_sort(events_and_times.begin(), events_and_times.end(), + CompareParsedEventTimes); + + parsed_events_sorted_by_time_.resize(events_and_times.size()); + for (unsigned int i = 0; i < events_and_times.size(); ++i) { + parsed_events_sorted_by_time_[i] = events_and_times[i]->event; + delete events_and_times[i]; + } +} + +bool PerfParser::ProcessEvents() { + memset(&stats_, 0, sizeof(stats_)); + + stats_.did_remap = false; // Explicitly clear the remap flag. + + // Pid 0 is called the swapper process. Even though perf does not record a + // COMM event for pid 0, we act like we did receive a COMM event for it. Perf + // does this itself, example: + // http://lxr.free-electrons.com/source/tools/perf/util/session.c#L1120 + commands_.insert(kSwapperCommandName); + pidtid_to_comm_map_[std::make_pair(kSwapperPid, kSwapperPid)] = + &(*commands_.find(kSwapperCommandName)); + + for (unsigned int i = 0; i < parsed_events_sorted_by_time_.size(); ++i) { + ParsedEvent& parsed_event = *parsed_events_sorted_by_time_[i]; + event_t& event = *(*parsed_event.raw_event); + switch (event.header.type) { + case PERF_RECORD_SAMPLE: + VLOG(1) << "IP: " << event.ip.ip; + ++stats_.num_sample_events; + + if (MapSampleEvent(&parsed_event)) { + ++stats_.num_sample_events_mapped; + } + break; + case PERF_RECORD_MMAP: { + VLOG(1) << "MMAP: " << event.mmap.filename; + ++stats_.num_mmap_events; + // Use the array index of the current mmap event as a unique identifier. + CHECK(MapMmapEvent(&event.mmap, i)) << "Unable to map MMAP event!"; + // No samples in this MMAP region yet, hopefully. + parsed_event.num_samples_in_mmap_region = 0; + DSOInfo dso_info; + // TODO(sque): Add Build ID as well. + dso_info.name = event.mmap.filename; + dso_set_.insert(dso_info); + break; + } + case PERF_RECORD_FORK: + VLOG(1) << "FORK: " << event.fork.ppid << ":" << event.fork.ptid + << " -> " << event.fork.pid << ":" << event.fork.tid; + ++stats_.num_fork_events; + CHECK(MapForkEvent(event.fork)) << "Unable to map FORK event!"; + break; + case PERF_RECORD_EXIT: + // EXIT events have the same structure as FORK events. + VLOG(1) << "EXIT: " << event.fork.ppid << ":" << event.fork.ptid; + ++stats_.num_exit_events; + break; + case PERF_RECORD_COMM: + VLOG(1) << "COMM: " << event.comm.pid << ":" << event.comm.tid << ": " + << event.comm.comm; + ++stats_.num_comm_events; + CHECK(MapCommEvent(event.comm)); + commands_.insert(event.comm.comm); + pidtid_to_comm_map_[std::make_pair(event.comm.pid, event.comm.tid)] = + &(*commands_.find(event.comm.comm)); + break; + case PERF_RECORD_LOST: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: + case PERF_RECORD_READ: + case PERF_RECORD_MAX: + VLOG(1) << "Parsed event type: " << event.header.type + << ". Doing nothing."; + break; + default: + LOG(ERROR) << "Unknown event type: " << event.header.type; + return false; + } + } + // Print stats collected from parsing. + LOG(INFO) << "Parser processed:" + << " " << stats_.num_mmap_events << " MMAP events" + << ", " << stats_.num_comm_events << " COMM events" + << ", " << stats_.num_fork_events << " FORK events" + << ", " << stats_.num_exit_events << " EXIT events" + << ", " << stats_.num_sample_events << " SAMPLE events" + << ", " << stats_.num_sample_events_mapped + << " of these were mapped"; + + float sample_mapping_percentage = + static_cast(stats_.num_sample_events_mapped) / + stats_.num_sample_events * 100.; + float threshold = options_.sample_mapping_percentage_threshold; + if (sample_mapping_percentage < threshold) { + LOG(ERROR) << "Mapped " << static_cast(sample_mapping_percentage) + << "% of samples, expected at least " + << static_cast(threshold) << "%"; + return false; + } + stats_.did_remap = options_.do_remap; + return true; +} + +bool PerfParser::MapSampleEvent(ParsedEvent* parsed_event) { + bool mapping_failed = false; + + // Find the associated command. + perf_sample sample_info; + if (!ReadPerfSampleInfo(*(*parsed_event->raw_event), &sample_info)) + return false; + PidTid pidtid = std::make_pair(sample_info.pid, sample_info.tid); + std::map::const_iterator comm_iter = + pidtid_to_comm_map_.find(pidtid); + // If there is no command found for this sample, mark it with a NULL command + // pointer. + if (comm_iter != pidtid_to_comm_map_.end()) { + parsed_event->set_command(*comm_iter->second); + } + + struct ip_event& event = (*parsed_event->raw_event)->ip; + uint64 unmapped_event_ip = event.ip; + + // Map the event IP itself. + if (!MapIPAndPidAndGetNameAndOffset(event.ip, + event.pid, + event.header.misc, + reinterpret_cast(&event.ip), + &parsed_event->dso_and_offset)) { + mapping_failed = true; + } + + if (sample_info.callchain && + !MapCallchain(event, unmapped_event_ip, sample_info.callchain, + parsed_event)) { + mapping_failed = true; + } + + if (sample_info.branch_stack && + !MapBranchStack(event, sample_info.branch_stack, parsed_event)) { + mapping_failed = true; + } + + // Write the remapped data back to the raw event regardless of whether it was + // entirely successfully remapped. A single failed remap should not + // invalidate all the other remapped entries. + if (!WritePerfSampleInfo(sample_info, *parsed_event->raw_event)) { + LOG(ERROR) << "Failed to write back remapped sample info."; + return false; + } + + return !mapping_failed; +} + +bool PerfParser::MapCallchain(const struct ip_event& event, + uint64 original_event_addr, + struct ip_callchain* callchain, + ParsedEvent* parsed_event) { + if (!callchain) { + LOG(ERROR) << "NULL call stack data."; + return false; + } + + bool mapping_failed = false; + + // If the callchain's length is 0, there is no work to do. + if (callchain->nr == 0) + return true; + + // The first callchain entry should indicate that this is a kernel vs user + // sample. + uint64 callchain_context = callchain->ips[kCallchainInitialContextIndex]; + switch (callchain_context) { + case PERF_CONTEXT_KERNEL: + CHECK_EQ(event.header.misc, PERF_RECORD_MISC_KERNEL); + break; + case PERF_CONTEXT_USER: + CHECK_EQ(event.header.misc, PERF_RECORD_MISC_USER); + break; + default: + // If the first entry is not a context marker, consider the rest of the + // callchain data invalid and return. + LOG(ERROR) << "Invalid callchain context: " + << callchain_context; + return false; + } + + // Return if we only have the context and nothing else. + if (callchain->nr == kCallchainBaseAddressIndex) + return true; + + // The second callchain entry is the same as the sample address. + if (callchain->ips[kCallchainBaseAddressIndex] != + original_event_addr) { + LOG(ERROR) << "Second callchain entry: " + << callchain->ips[kCallchainBaseAddressIndex] + << " doesn't match sample address: " + << original_event_addr; + return false; + } + // The sample address has already been mapped so no need to map this one. + callchain->ips[kCallchainBaseAddressIndex] = event.ip; + + // Keeps track of whether the current entry is kernel or user. + uint16 current_misc = event.header.misc; + parsed_event->callchain.resize(callchain->nr); + int num_entries_mapped = 0; + for (unsigned int j = kFirstRelevantCallchainIndex; j < callchain->nr; ++j) { + uint64 entry = callchain->ips[j]; + // When a callchain context entry is found, do not attempt to symbolize + // it. Instead use it to update |current_misc|. + switch (entry) { + case PERF_CONTEXT_KERNEL: + current_misc = PERF_RECORD_MISC_KERNEL; + continue; + case PERF_CONTEXT_USER: + current_misc = PERF_RECORD_MISC_USER; + continue; + default: + if (!MapIPAndPidAndGetNameAndOffset( + entry, + event.pid, + current_misc, + reinterpret_cast(&callchain->ips[j]), + &parsed_event->callchain[num_entries_mapped++])) { + mapping_failed = true; + } + break; + } + } + // Not all the entries were mapped. Trim |parsed_event->callchain| to + // remove unused entries at the end. + parsed_event->callchain.resize(num_entries_mapped); + + return !mapping_failed; +} + +bool PerfParser::MapBranchStack(const struct ip_event& event, + struct branch_stack* branch_stack, + ParsedEvent* parsed_event) { + if (!branch_stack) { + LOG(ERROR) << "NULL branch stack data."; + return false; + } + + // First, trim the branch stack to remove trailing null entries. + size_t trimmed_size = 0; + for (size_t i = 0; i < branch_stack->nr; ++i) { + // Count the number of non-null entries before the first null entry. + if (IsNullBranchStackEntry(branch_stack->entries[i])) { + break; + } + ++trimmed_size; + } + + // If a null entry was found, make sure all subsequent null entries are NULL + // as well. + for (size_t i = trimmed_size; i < branch_stack->nr; ++i) { + const struct branch_entry& entry = branch_stack->entries[i]; + if (!IsNullBranchStackEntry(entry)) { + LOG(ERROR) << "Non-null branch stack entry found after null entry: " + << reinterpret_cast(entry.from) << " -> " + << reinterpret_cast(entry.to); + return false; + } + } + + // Map branch stack addresses. + parsed_event->branch_stack.resize(trimmed_size); + for (unsigned int i = 0; i < trimmed_size; ++i) { + struct branch_entry& entry = branch_stack->entries[i]; + ParsedEvent::BranchEntry& parsed_entry = parsed_event->branch_stack[i]; + if (!MapIPAndPidAndGetNameAndOffset(entry.from, + event.pid, + event.header.misc, + reinterpret_cast( + &entry.from), + &parsed_entry.from)) { + return false; + } + if (!MapIPAndPidAndGetNameAndOffset(entry.to, + event.pid, + event.header.misc, + reinterpret_cast(&entry.to), + &parsed_entry.to)) { + return false; + } + parsed_entry.predicted = entry.flags.predicted; + // Either predicted or mispredicted, not both. But don't use a CHECK here, + // just exit gracefully because it's a minor issue. + if (entry.flags.predicted == entry.flags.mispred) { + LOG(ERROR) << "Branch stack entry predicted and mispred flags " + << "both have value " << entry.flags.mispred; + return false; + } + } + + return true; +} + +bool PerfParser::MapIPAndPidAndGetNameAndOffset( + uint64 ip, + uint32 pid, + uint16 misc, + uint64* new_ip, + ParsedEvent::DSOAndOffset* dso_and_offset) { + // Attempt to find the synthetic address of the IP sample in this order: + // 1. Address space of the kernel. + // 2. Address space of its own process. + // 3. Address space of the parent process. + + AddressMapper* mapper = NULL; + uint64 mapped_addr = 0; + + // Sometimes the first event we see is a SAMPLE event and we don't have the + // time to create an address mapper for a process. Example, for pid 0. + if (process_mappers_.find(pid) == process_mappers_.end()) { + CreateProcessMapper(pid); + } + mapper = process_mappers_[pid]; + bool mapped = mapper->GetMappedAddress(ip, &mapped_addr); + // TODO(asharif): What should we do when we cannot map a SAMPLE event? + + if (mapped) { + if (dso_and_offset) { + uint64 id = kuint64max; + CHECK(mapper->GetMappedIDAndOffset(ip, &id, &dso_and_offset->offset_)); + // Make sure the ID points to a valid event. + CHECK_LE(id, parsed_events_sorted_by_time_.size()); + ParsedEvent* parsed_event = parsed_events_sorted_by_time_[id]; + CHECK_EQ((*parsed_event->raw_event)->header.type, PERF_RECORD_MMAP); + + // Find the mmap DSO filename in the set of known DSO names. + // TODO(sque): take build IDs into account. + DSOInfo dso_info; + dso_info.name = (*parsed_event->raw_event)->mmap.filename; + std::set::const_iterator dso_iter = dso_set_.find(dso_info); + CHECK(dso_iter != dso_set_.end()); + dso_and_offset->dso_info_ = &(*dso_iter); + if (id) { + // For non-kernel events, we need to preserve the pgoff. + // TODO(cwp-team): Add unit test for this case. + dso_and_offset->offset_ += (*parsed_event->raw_event)->mmap.pgoff; + } + + ++parsed_event->num_samples_in_mmap_region; + } + if (options_.do_remap) + *new_ip = mapped_addr; + } + return mapped; +} + +bool PerfParser::MapMmapEvent(struct mmap_event* event, uint64 id) { + // We need to hide only the real kernel addresses. However, to make things + // more secure, and make the mapping idempotent, we should remap all + // addresses, both kernel and non-kernel. + + AddressMapper* mapper = NULL; + + uint32 pid = event->pid; + if (process_mappers_.find(pid) == process_mappers_.end()) { + CreateProcessMapper(pid); + } + mapper = process_mappers_[pid]; + + uint64 len = event->len; + uint64 start = event->start; + uint64 pgoff = event->pgoff; + + // |id| == 0 corresponds to the kernel mmap. We have several cases here: + // + // For ARM and x86, in sudo mode, pgoff == start, example: + // start=0x80008200 + // pgoff=0x80008200 + // len =0xfffffff7ff7dff + // + // For x86-64, in sudo mode, pgoff is between start and start + len. SAMPLE + // events lie between pgoff and pgoff + length of the real kernel binary, + // example: + // start=0x3bc00000 + // pgoff=0xffffffffbcc00198 + // len =0xffffffff843fffff + // SAMPLE events will be found after pgoff. For kernels with ASLR, pgoff will + // be something only visible to the root user, and will be randomized at + // startup. With |remap| set to true, we should hide pgoff in this case. So we + // normalize all SAMPLE events relative to pgoff. + // + // For non-sudo mode, the kernel will be mapped from 0 to the pointer limit, + // example: + // start=0x0 + // pgoff=0x0 + // len =0xffffffff + if (id == 0) { + // If pgoff is between start and len, we normalize the event by setting + // start to be pgoff just like how it is for ARM and x86. We also set len to + // be a much smaller number (closer to the real length of the kernel binary) + // because SAMPLEs are actually only seen between |event->pgoff| and + // |event->pgoff + kernel text size|. + if (pgoff > start && pgoff < start + len) { + len = len + start - pgoff; + start = pgoff; + } + // For kernels with ALSR pgoff is critical information that should not be + // revealed when |remap| is true. + pgoff = 0; + } + + if (!mapper->MapWithID(start, len, id, true)) { + mapper->DumpToLog(); + return false; + } + + uint64 mapped_addr; + CHECK(mapper->GetMappedAddress(start, &mapped_addr)); + if (options_.do_remap) { + event->start = mapped_addr; + event->len = len; + event->pgoff = pgoff; + } + return true; +} + +void PerfParser::CreateProcessMapper(uint32 pid, uint32 ppid) { + AddressMapper* mapper; + if (process_mappers_.find(ppid) != process_mappers_.end()) + mapper = new AddressMapper(*process_mappers_[ppid]); + else + mapper = new AddressMapper(); + + process_mappers_[pid] = mapper; +} + +bool PerfParser::MapCommEvent(const struct comm_event& event) { + uint32 pid = event.pid; + if (process_mappers_.find(pid) == process_mappers_.end()) { + CreateProcessMapper(pid); + } + return true; +} + +bool PerfParser::MapForkEvent(const struct fork_event& event) { + PidTid parent = std::make_pair(event.ppid, event.ptid); + PidTid child = std::make_pair(event.pid, event.tid); + if (parent != child && + pidtid_to_comm_map_.find(parent) != pidtid_to_comm_map_.end()) { + pidtid_to_comm_map_[child] = pidtid_to_comm_map_[parent]; + } + + uint32 pid = event.pid; + if (process_mappers_.find(pid) != process_mappers_.end()) { + DLOG(INFO) << "Found an existing process mapper with pid: " << pid; + return true; + } + + // If the parent and child pids are the same, this is just a new thread + // within the same process, so don't do anything. + if (event.ppid == pid) + return true; + + CreateProcessMapper(pid, event.ppid); + return true; +} + +void PerfParser::ResetAddressMappers() { + std::map::iterator iter; + for (iter = process_mappers_.begin(); iter != process_mappers_.end(); ++iter) + delete iter->second; + process_mappers_.clear(); +} + +} // namespace quipper Index: lib/ProfileData/PerfConverter/quipper/perf_reader.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/perf_reader.h @@ -0,0 +1,285 @@ +//=-- perf_reader.h ---------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef QUIPPER_PERF_READER_H_ +#define QUIPPER_PERF_READER_H_ + +#include +#include +#include +#include + +#include "llvm_port.h" + +#include "quipper_string.h" +#include "kernel/perf_internals.h" + +namespace quipper { + +struct PerfFileAttr { + struct perf_event_attr attr; + std::vector ids; +}; + +// Based on code in tools/perf/util/header.c, the metadata are of the following +// formats: + +// Based on kernel/perf_internals.h +const size_t kBuildIDArraySize = 20; +const size_t kBuildIDStringLength = kBuildIDArraySize * 2; + +struct CStringWithLength { + u32 len; + string str; +}; + +struct PerfStringMetadata { + u32 type; + std::vector data; +}; + +struct PerfUint32Metadata { + u32 type; + std::vector data; +}; + +struct PerfUint64Metadata { + u32 type; + std::vector data; +}; + +typedef u32 num_siblings_type; + +struct PerfCPUTopologyMetadata { + std::vector core_siblings; + std::vector thread_siblings; +}; + +struct PerfNodeTopologyMetadata { + u32 id; + u64 total_memory; + u64 free_memory; + CStringWithLength cpu_list; +}; + +struct BufferWithSize; +struct ConstBufferWithSize; + +class PerfReader { + public: + PerfReader() : sample_type_(0), + read_format_(0), + is_cross_endian_(0) {} + ~PerfReader(); + + // Makes |build_id| fit the perf format, by either truncating it or adding + // zeros to the end so that it has length kBuildIDStringLength. + static void PerfizeBuildIDString(string* build_id); + + // Changes |build_id| to the best guess of what the build id was before going + // through perf. Specifically, it keeps removing trailing sequences of four + // zero bytes (or eight '0' characters) until there are no more such + // sequences, or the build id would be empty if the process were repeated. + static void UnperfizeBuildIDString(string* build_id); + + bool ReadFile(const string& filename); + bool ReadFromVector(const std::vector& data); + bool ReadFromString(const string& str); + bool ReadFromPointer(const char* perf_data, size_t size); + + // TODO(rohinmshah): GetSize should not use RegenerateHeader (so that it can + // be const). Ideally, RegenerateHeader would be deleted and instead of + // having out_header_ as an instance variable, it would be computed + // dynamically whenever needed. + + // Returns the size in bytes that would be written by any of the methods that + // write the entire perf data file (WriteFile, WriteToPointer, etc). + size_t GetSize(); + + bool WriteFile(const string& filename); + bool WriteToVector(std::vector* data); + bool WriteToString(string* str); + bool WriteToPointer(char* buffer, size_t size); + + bool RegenerateHeader(); + + // Stores the mapping from filenames to build ids in build_id_events_. + // Returns true on success. + // Note: If |filenames_to_build_ids| contains a mapping for a filename for + // which there is already a build_id_event in build_id_events_, a duplicate + // build_id_event will be created, and the old build_id_event will NOT be + // deleted. + bool InjectBuildIDs(const std::map& filenames_to_build_ids); + + // Replaces existing filenames with filenames from |build_ids_to_filenames| + // by joining on build ids. If a build id in |build_ids_to_filenames| is not + // present in this parser, it is ignored. + bool Localize(const std::map& build_ids_to_filenames); + + // Same as Localize, but joins on filenames instead of build ids. + bool LocalizeUsingFilenames(const std::map& filename_map); + + // Stores a list of unique filenames found in MMAP events into + // |filenames|. Any existing data in |filenames| will be lost. + void GetFilenames(std::vector* filenames) const; + void GetFilenamesAsSet(std::set* filenames) const; + + // Uses build id events to populate |filenames_to_build_ids|. + // Any existing data in |filenames_to_build_ids| will be lost. + // Note: A filename returned by GetFilenames need not be present in this map, + // since there may be no build id event corresponding to the MMAP. + void GetFilenamesToBuildIDs( + std::map* filenames_to_build_ids) const; + + static bool IsSupportedEventType(uint32 type); + + // If a program using PerfReader calls events(), it could work with the + // resulting events by importing kernel/perf_internals.h. This would also + // apply to other forms of data (attributes, event types, build ids, etc.) + // However, there is no easy way to work with the sample info within events. + // The following two methods have been added for this purpose. + + // Extracts from a perf event |event| info about the perf sample that + // contains the event. Stores info in |sample|. + bool ReadPerfSampleInfo(const event_t& event, + struct perf_sample* sample) const; + // Writes |sample| info back to a perf event |event|. + bool WritePerfSampleInfo(const perf_sample& sample, + event_t* event) const; + + // Accessor funcs. + const std::vector& attrs() const { + return attrs_; + } + + const std::vector& events() const { + return events_; + } + + const std::vector& event_types() const { + return event_types_; + } + + const std::vector& build_id_events() const { + return build_id_events_; + } + + protected: + bool ReadHeader(const ConstBufferWithSize& data); + + bool ReadAttrs(const ConstBufferWithSize& data); + bool ReadAttr(const ConstBufferWithSize& data, size_t* offset); + bool ReadEventAttr(const ConstBufferWithSize& data, size_t* offset, + perf_event_attr* attr); + bool ReadUniqueIDs(const ConstBufferWithSize& data, size_t num_ids, + size_t* offset, std::vector* ids); + + bool ReadEventTypes(const ConstBufferWithSize& data); + bool ReadEventType(const ConstBufferWithSize& data, size_t* offset); + + bool ReadData(const ConstBufferWithSize& data); + + // Reads metadata in normal mode. + bool ReadMetadata(const ConstBufferWithSize& data); + bool ReadBuildIDMetadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size); + bool ReadStringMetadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size); + bool ReadUint32Metadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size); + bool ReadUint64Metadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size); + bool ReadCPUTopologyMetadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size); + bool ReadNUMATopologyMetadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size); + + // Read perf data from piped perf output data. + bool ReadPipedData(const ConstBufferWithSize& data); + + // Like WriteToPointer, but does not check if the buffer is large enough. + bool WriteToPointerWithoutCheckingSize(char* buffer, size_t size); + + bool WriteHeader(const BufferWithSize& data) const; + bool WriteAttrs(const BufferWithSize& data) const; + bool WriteEventTypes(const BufferWithSize& data) const; + bool WriteData(const BufferWithSize& data) const; + bool WriteMetadata(const BufferWithSize& data) const; + + // For writing the various types of metadata. + bool WriteBuildIDMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const; + bool WriteStringMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const; + bool WriteUint32Metadata(u32 type, size_t* offset, + const BufferWithSize& data) const; + bool WriteUint64Metadata(u32 type, size_t* offset, + const BufferWithSize& data) const; + bool WriteEventDescMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const; + bool WriteCPUTopologyMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const; + bool WriteNUMATopologyMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const; + + // For reading event blocks within piped perf data. + bool ReadAttrEventBlock(const ConstBufferWithSize& data, size_t offset, + size_t size); + bool ReadPerfEventBlock(const event_t& event); + + // Returns the number of types of metadata stored. + size_t GetNumMetadata() const; + + // For computing the sizes of the various types of metadata. + size_t GetBuildIDMetadataSize() const; + size_t GetStringMetadataSize() const; + size_t GetUint32MetadataSize() const; + size_t GetUint64MetadataSize() const; + size_t GetEventDescMetadataSize() const; + size_t GetCPUTopologyMetadataSize() const; + size_t GetNUMATopologyMetadataSize() const; + + // Returns true if we should write the number of strings for the string + // metadata of type |type|. + bool NeedsNumberOfStringData(u32 type) const; + + // Replaces existing filenames in MMAP events based on |filename_map|. + // This method does not change |build_id_events_|. + bool LocalizeMMapFilenames(const std::map& filename_map); + + std::vector attrs_; + std::vector event_types_; + std::vector events_; + std::vector build_id_events_; + std::vector string_metadata_; + std::vector uint32_metadata_; + std::vector uint64_metadata_; + PerfCPUTopologyMetadata cpu_topology_; + std::vector numa_topology_; + uint64 sample_type_; + uint64 read_format_; + uint64 metadata_mask_; + + // Indicates that the perf data being read is from machine with a different + // endianness than the current machine. + bool is_cross_endian_; + + private: + // The file header is either a normal header or a piped header. + union { + struct perf_file_header header_; + struct perf_pipe_file_header piped_header_; + }; + struct perf_file_header out_header_; + + DISALLOW_COPY_AND_ASSIGN(PerfReader); +}; + +} // namespace quipper + +#endif // QUIPPER_PERF_READER_H_ Index: lib/ProfileData/PerfConverter/quipper/perf_reader.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/perf_reader.cc @@ -0,0 +1,2106 @@ +//=-- perf_reader.cc --------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include +#include + +#include +#include +#include +#include +#include + +#include "perf_reader.h" +#include "quipper_string.h" +#include "utils.h" + +namespace quipper { + +struct BufferWithSize { + char* ptr; + size_t size; +}; + +// If the buffer is read-only, it is not sufficient to mark the previous struct +// as const, as this only means that the pointer cannot be changed, and says +// nothing about the contents of the buffer. So, we need another struct. +struct ConstBufferWithSize { + const char* ptr; + size_t size; +}; + +namespace { + +// The type of the number of string data, found in the command line metadata in +// the perf data file. +typedef u32 num_string_data_type; + +// Types of the event desc fields that are not found in other structs. +typedef u32 event_desc_num_events; +typedef u32 event_desc_attr_size; +typedef u32 event_desc_num_unique_ids; + +// The type of the number of nodes field in NUMA topology. +typedef u32 numa_topology_num_nodes_type; + +// The first 64 bits of the perf header, used as a perf data file ID tag. +const uint64 kPerfMagic = 0x32454c4946524550LL; + +// A mask that is applied to metadata_mask_ in order to get a mask for +// only the metadata supported by quipper. +// Currently, we support build ids, hostname, osrelease, version, arch, nrcpus, +// cpudesc, cpuid, totalmem, cmdline, eventdesc, cputopology, numatopology, and +// branchstack. +// The mask is computed as (1 << HEADER_BUILD_ID) | +// (1 << HEADER_HOSTNAME) | ... | (1 << HEADER_BRANCH_STACK) +const uint32 kSupportedMetadataMask = 0xfffc; + +// By default, the build ID event has PID = -1. +const uint32 kDefaultBuildIDEventPid = static_cast(-1); + +// Eight bits in a byte. +size_t BytesToBits(size_t num_bytes) { + return num_bytes * 8; +} + +template +void ByteSwap(T* input) { + switch (sizeof(T)) { + case sizeof(uint8): + LOG(WARNING) << "Attempting to byte swap on a single byte."; + break; + case sizeof(uint16): + *input = bswap_16(*input); + break; + case sizeof(uint32): + *input = bswap_32(*input); + break; + case sizeof(uint64): + *input = bswap_64(*input); + break; + default: + LOG(FATAL) << "Invalid size for byte swap: " << sizeof(T) << " bytes"; + break; + } +} + +// The code currently assumes that the compiler will not add any padding to the +// various structs. These CHECKs make sure that this is true. +void CheckNoEventHeaderPadding() { + perf_event_header header; + CHECK_EQ(sizeof(header), + sizeof(header.type) + sizeof(header.misc) + sizeof(header.size)); +} + +void CheckNoPerfEventAttrPadding() { + perf_event_attr attr; + CHECK_EQ(sizeof(attr), + (reinterpret_cast(&attr.branch_sample_type) - + reinterpret_cast(&attr)) + + sizeof(attr.branch_sample_type)); +} + +void CheckNoEventTypePadding() { + perf_trace_event_type event_type; + CHECK_EQ(sizeof(event_type), + sizeof(event_type.event_id) + sizeof(event_type.name)); +} + +void CheckNoBuildIDEventPadding() { + build_id_event event; + CHECK_EQ(sizeof(event), + sizeof(event.header.type) + sizeof(event.header.misc) + + sizeof(event.header.size) + sizeof(event.pid) + + sizeof(event.build_id)); +} + +// Creates/updates a build id event with |build_id| and |filename|. +// Passing "" to |build_id| or |filename| will leave the corresponding field +// unchanged (in which case |event| must be non-null). +// If |event| is null or is not large enough, a new event will be created. +// In this case, if |event| is non-null, it will be freed. +// Otherwise, updates the fields of the existing event. +// |new_misc| indicates kernel vs user space, and is only used to fill in the +// |header.misc| field of new events. +// In either case, returns a pointer to the event containing the updated data, +// or NULL in the case of a failure. +build_id_event* CreateOrUpdateBuildID(const string& build_id, + const string& filename, + uint16 new_misc, + build_id_event* event) { + // When creating an event from scratch, build id and filename must be present. + if (!event && (build_id.empty() || filename.empty())) + return NULL; + size_t new_len = GetUint64AlignedStringLength( + filename.empty() ? event->filename : filename); + + // If event is null, or we don't have enough memory, allocate more memory, and + // switch the new pointer with the existing pointer. + size_t new_size = sizeof(*event) + new_len; + if (!event || new_size > event->header.size) { + build_id_event* new_event = CallocMemoryForBuildID(new_size); + + if (event) { + // Copy over everything except the filename and free the event. + // It is guaranteed that we are changing the filename - otherwise, the old + // size and the new size would be equal. + *new_event = *event; + free(event); + } else { + // Fill in the fields appropriately. + new_event->header.type = HEADER_BUILD_ID; + new_event->header.misc = new_misc; + new_event->pid = kDefaultBuildIDEventPid; + } + event = new_event; + } + + // Here, event is the pointer to the build_id_event that we are keeping. + // Update the event's size, build id, and filename. + if (!build_id.empty() && + !StringToHex(build_id, event->build_id, arraysize(event->build_id))) { + free(event); + return NULL; + } + + if (!filename.empty()) + CHECK_GT(snprintf(event->filename, new_len, "%s", filename.c_str()), 0); + + event->header.size = new_size; + return event; +} + +// Reads |size| bytes from |buffer| into |dest| and advances |src_offset|. +bool ReadDataFromBuffer(const ConstBufferWithSize& buffer, + size_t size, + const string& value_name, + size_t* src_offset, + void* dest) { + size_t end_offset = *src_offset + size / sizeof(*buffer.ptr); + if (buffer.size < end_offset) { + LOG(ERROR) << "Not enough bytes to read " << value_name; + return false; + } + memcpy(dest, buffer.ptr + *src_offset, size); + *src_offset = end_offset; + return true; +} + +// Reads |size| bytes from |data| into |buffer| and advances |buffer_offset|. +bool WriteDataToBuffer(const void* data, + size_t size, + const string& value_name, + size_t* buffer_offset, + const BufferWithSize& buffer) { + size_t end_offset = *buffer_offset + size / sizeof(*buffer.ptr); + if (buffer.size < end_offset) { + LOG(ERROR) << "No space in buffer to write " << value_name; + return false; + } + memcpy(buffer.ptr + *buffer_offset, data, size); + *buffer_offset = end_offset; + return true; +} + +// Reads a CStringWithLength from |buffer| into |dest|, and advances the offset. +bool ReadStringFromBuffer(const ConstBufferWithSize& buffer, + bool is_cross_endian, + size_t* offset, + CStringWithLength* dest) { + if (!ReadDataFromBuffer(buffer, sizeof(dest->len), "string length", + offset, &dest->len)) { + return false; + } + if (is_cross_endian) + ByteSwap(&dest->len); + + if (buffer.size < *offset + dest->len) { + LOG(ERROR) << "Not enough bytes to read string"; + return false; + } + dest->str = string(buffer.ptr + *offset); + *offset += dest->len / sizeof(*buffer.ptr); + return true; +} + +// Writes a CStringWithLength from |src| to |buffer|, and advances the offset. +bool WriteStringToBuffer(const CStringWithLength& src, + const BufferWithSize& buffer, + size_t* offset) { + const size_t kDestUnitSize = sizeof(*buffer.ptr); + size_t final_offset = *offset + src.len + sizeof(src.len) / kDestUnitSize; + if (buffer.size < final_offset) { + LOG(ERROR) << "Not enough space to write string"; + return false; + } + + if (!WriteDataToBuffer(&src.len, sizeof(src.len), + "length of string metadata", offset, buffer)) { + return false; + } + + memset(buffer.ptr + *offset, 0, src.len * kDestUnitSize); + CHECK_GT(snprintf(buffer.ptr + *offset, src.len, "%s", src.str.c_str()), 0); + *offset += src.len; + return true; +} + +// Read read info from perf data. Corresponds to sample format type +// PERF_SAMPLE_READ. +const uint64* ReadReadInfo(const uint64* array, + bool swap_bytes, + uint64 read_format, + struct perf_sample* sample) { + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + sample->read.time_enabled = *array++; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + sample->read.time_running = *array++; + if (read_format & PERF_FORMAT_ID) + sample->read.id = *array++; + + if (swap_bytes) { + ByteSwap(&sample->read.time_enabled); + ByteSwap(&sample->read.time_running); + ByteSwap(&sample->read.id); + } + + return array; +} + +// Read call chain info from perf data. Corresponds to sample format type +// PERF_SAMPLE_CALLCHAIN. +const uint64* ReadCallchain(const uint64* array, + bool swap_bytes, + struct perf_sample* sample) { + // Make sure there is no existing allocated memory in |sample->callchain|. + CHECK_EQ(static_cast(NULL), sample->callchain); + + // The callgraph data consists of a uint64 value |nr| followed by |nr| + // addresses. + uint64 callchain_size = *array++; + if (swap_bytes) + ByteSwap(&callchain_size); + struct ip_callchain* callchain = + reinterpret_cast(new uint64[callchain_size + 1]); + callchain->nr = callchain_size; + for (size_t i = 0; i < callchain_size; ++i) { + callchain->ips[i] = *array++; + if (swap_bytes) + ByteSwap(&callchain->ips[i]); + } + sample->callchain = callchain; + + return array; +} + +// Read raw info from perf data. Corresponds to sample format type +// PERF_SAMPLE_RAW. +const uint64* ReadRawData(const uint64* array, + bool swap_bytes, + struct perf_sample* sample) { + // First read the size. + const uint32* ptr = reinterpret_cast(array); + sample->raw_size = *ptr++; + if (swap_bytes) + ByteSwap(&sample->raw_size); + + // Allocate space for and read the raw data bytes. + sample->raw_data = new uint8[sample->raw_size]; + memcpy(sample->raw_data, ptr, sample->raw_size); + + // Determine the bytes that were read, and align to the next 64 bits. + int bytes_read = AlignSize(sizeof(sample->raw_size) + sample->raw_size, + sizeof(uint64)); + array += bytes_read / sizeof(uint64); + + return array; +} + +// Read call chain info from perf data. Corresponds to sample format type +// PERF_SAMPLE_CALLCHAIN. +const uint64* ReadBranchStack(const uint64* array, + bool swap_bytes, + struct perf_sample* sample) { + // Make sure there is no existing allocated memory in + // |sample->branch_stack|. + CHECK_EQ(static_cast(NULL), sample->branch_stack); + + // The branch stack data consists of a uint64 value |nr| followed by |nr| + // branch_entry structs. + uint64 branch_stack_size = *array++; + if (swap_bytes) + ByteSwap(&branch_stack_size); + struct branch_stack* branch_stack = + reinterpret_cast( + new uint8[sizeof(uint64) + + branch_stack_size * sizeof(struct branch_entry)]); + branch_stack->nr = branch_stack_size; + for (size_t i = 0; i < branch_stack_size; ++i) { + memcpy(&branch_stack->entries[i], array, sizeof(struct branch_entry)); + array += sizeof(struct branch_entry) / sizeof(*array); + if (swap_bytes) { + ByteSwap(&branch_stack->entries[i].from); + ByteSwap(&branch_stack->entries[i].to); + } + } + sample->branch_stack = branch_stack; + + return array; +} + +size_t ReadPerfSampleFromData(const uint64* array, + const uint64 sample_fields, + const uint64 read_format, + bool swap_bytes, + struct perf_sample* sample) { + const uint64* initial_array_ptr = array; + const uint64 k32BitFields = PERF_SAMPLE_TID | PERF_SAMPLE_CPU; + bool read_read_info = false; + bool read_raw_data = false; + bool read_callchain = false; + bool read_branch_stack = false; + + for (int index = 0; (sample_fields >> index) > 0; ++index) { + uint64 sample_type = (1 << index); + union { + uint32 val32[sizeof(uint64) / sizeof(uint32)]; + uint64 val64; + }; + if (!(sample_type & sample_fields)) + continue; + + val64 = *array; + + if (swap_bytes) { + if (k32BitFields & sample_type) { + ByteSwap(&val32[0]); + ByteSwap(&val32[1]); + } else { + ByteSwap(&val64); + } + } + + switch (sample_type) { + case PERF_SAMPLE_IP: + sample->ip = val64; + break; + case PERF_SAMPLE_TID: + sample->pid = val32[0]; + sample->tid = val32[1]; + break; + case PERF_SAMPLE_TIME: + sample->time = val64; + break; + case PERF_SAMPLE_ADDR: + sample->addr = val64; + break; + case PERF_SAMPLE_ID: + sample->id = val64; + break; + case PERF_SAMPLE_STREAM_ID: + sample->stream_id = val64; + break; + case PERF_SAMPLE_CPU: + sample->cpu = val32[0]; + break; + case PERF_SAMPLE_PERIOD: + sample->period = val64; + break; + case PERF_SAMPLE_READ: + read_read_info = true; + break; + case PERF_SAMPLE_RAW: + read_raw_data = true; + break; + case PERF_SAMPLE_CALLCHAIN: + read_callchain = true; + break; + case PERF_SAMPLE_BRANCH_STACK: + read_branch_stack = true; + break; + default: + LOG(FATAL) << "Invalid sample type " << sample_type; + break; + } + + switch (sample_type) { + case PERF_SAMPLE_IP: + case PERF_SAMPLE_TID: + case PERF_SAMPLE_TIME: + case PERF_SAMPLE_ADDR: + case PERF_SAMPLE_ID: + case PERF_SAMPLE_STREAM_ID: + case PERF_SAMPLE_CPU: + case PERF_SAMPLE_PERIOD: + ++array; + break; + case PERF_SAMPLE_READ: + case PERF_SAMPLE_RAW: + case PERF_SAMPLE_CALLCHAIN: + case PERF_SAMPLE_BRANCH_STACK: + // Read info, raw info, call chain, and branch stack are special cases. + // They come after the other fields in the sample info data, regardless of + // the order of |sample_type| bits. So do not increment the data pointer. + break; + default: + LOG(FATAL) << "Invalid sample type " << sample_type; + } + } + + // Read each of the complex sample info fields. + if (read_read_info) { + // TODO(cwp-team): support grouped read info. + if (read_format & PERF_FORMAT_GROUP) + return 0; + array = ReadReadInfo(array, swap_bytes, read_format, sample); + } + if (read_callchain) { + array = ReadCallchain(array, swap_bytes, sample); + } + if (read_raw_data) { + array = ReadRawData(array, swap_bytes, sample); + } + if (read_branch_stack) { + array = ReadBranchStack(array, swap_bytes, sample); + } + + return (array - initial_array_ptr) * sizeof(uint64); +} + +size_t WritePerfSampleToData(const struct perf_sample& sample, + const uint64 sample_fields, + const uint64 read_format, + uint64* array) { + uint64* initial_array_ptr = array; + bool write_read_info = false; + bool write_raw_data = false; + bool write_callchain = false; + bool write_branch_stack = false; + + for (int index = 0; (sample_fields >> index) > 0; ++index) { + uint64 sample_type = (1 << index); + union { + uint32 val32[sizeof(uint64) / sizeof(uint32)]; + uint64 val64; + }; + if (!(sample_type & sample_fields)) + continue; + + switch (sample_type) { + case PERF_SAMPLE_IP: + val64 = sample.ip; + break; + case PERF_SAMPLE_TID: + val32[0] = sample.pid; + val32[1] = sample.tid; + break; + case PERF_SAMPLE_TIME: + val64 = sample.time; + break; + case PERF_SAMPLE_ADDR: + val64 = sample.addr; + break; + case PERF_SAMPLE_ID: + val64 = sample.id; + break; + case PERF_SAMPLE_STREAM_ID: + val64 = sample.stream_id; + break; + case PERF_SAMPLE_CPU: + val64 = sample.cpu; + break; + case PERF_SAMPLE_PERIOD: + val64 = sample.period; + break; + case PERF_SAMPLE_READ: + write_read_info = true; + continue; + case PERF_SAMPLE_RAW: + write_raw_data = true; + continue; + case PERF_SAMPLE_CALLCHAIN: + write_callchain = true; + continue; + case PERF_SAMPLE_BRANCH_STACK: + write_branch_stack = true; + continue; + default: + LOG(FATAL) << "Invalid sample type " << sample_type; + } + *array++ = val64; + } + + if (write_read_info) { + // TODO(cwp-team): support grouped read info. + if (read_format & PERF_FORMAT_GROUP) + return 0; + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + *array++ = sample.read.time_enabled; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + *array++ = sample.read.time_running; + if (read_format & PERF_FORMAT_ID) + *array++ = sample.read.id; + } + + if (write_callchain) { + *array++ = sample.callchain->nr; + for (size_t i = 0; i < sample.callchain->nr; ++i) + *array++ = sample.callchain->ips[i]; + } + + if (write_raw_data) { + uint32* ptr = reinterpret_cast(array); + *ptr++ = sample.raw_size; + memcpy(ptr, sample.raw_data, sample.raw_size); + + // Update the data read pointer after aligning to the next 64 bytes. + int num_bytes = AlignSize(sizeof(sample.raw_size) + sample.raw_size, + sizeof(uint64)); + array += num_bytes / sizeof(uint64); + } + + if (write_branch_stack) { + *array++ = sample.branch_stack->nr; + for (size_t i = 0; i < sample.branch_stack->nr; ++i) { + *array++ = sample.branch_stack->entries[i].from; + *array++ = sample.branch_stack->entries[i].to; + memcpy(array++, &sample.branch_stack->entries[i].flags, sizeof(uint64)); + } + } + return (array - initial_array_ptr) * sizeof(uint64); +} + +} // namespace + +PerfReader::~PerfReader() { + // Free allocated memory. + for (size_t i = 0; i < events_.size(); ++i) + if (events_[i]) + free(events_[i]); + + for (size_t i = 0; i < build_id_events_.size(); ++i) + if (build_id_events_[i]) + free(build_id_events_[i]); +} + +void PerfReader::PerfizeBuildIDString(string* build_id) { + build_id->resize(kBuildIDStringLength, '0'); +} + +void PerfReader::UnperfizeBuildIDString(string* build_id) { + const size_t kPaddingSize = 8; + const string kBuildIDPadding = string(kPaddingSize, '0'); + + // Remove kBuildIDPadding from the end of build_id until we cannot remove any + // more, or removing more would cause the build id to be empty. + while (build_id->size() > kPaddingSize && + build_id->substr(build_id->size() - kPaddingSize) == kBuildIDPadding) { + build_id->resize(build_id->size() - kPaddingSize); + } +} + +bool PerfReader::ReadFile(const string& filename) { + std::vector data; + if (!ReadFileToData(filename, &data)) + return false; + return ReadFromVector(data); +} + +bool PerfReader::ReadFromVector(const std::vector& data) { + return ReadFromPointer(&data[0], data.size()); +} + +bool PerfReader::ReadFromString(const string& str) { + return ReadFromPointer(str.c_str(), str.size()); +} + +bool PerfReader::ReadFromPointer(const char* perf_data, size_t size) { + const ConstBufferWithSize data = { perf_data, size }; + + if (data.size == 0) + return false; + if (!ReadHeader(data)) + return false; + + // Check if it is normal perf data. + if (header_.size == sizeof(header_)) { + DLOG(INFO) << "Perf data is in normal format."; + metadata_mask_ = header_.adds_features[0]; + return (ReadAttrs(data) && ReadEventTypes(data) && ReadData(data) + && ReadMetadata(data)); + } + + // Otherwise it is piped data. + if (piped_header_.size != sizeof(piped_header_)) { + LOG(ERROR) << "Expecting piped data format, but header size " + << piped_header_.size << " does not match expected size " + << sizeof(piped_header_); + return false; + } + + return ReadPipedData(data); +} + +bool PerfReader::WriteFile(const string& filename) { + std::vector data; + return WriteToVector(&data) && WriteDataToFile(data, filename); +} + +bool PerfReader::WriteToVector(std::vector* data) { + data->resize(GetSize()); + return WriteToPointerWithoutCheckingSize(&data->at(0), data->size()); +} + +bool PerfReader::WriteToString(string* str) { + str->resize(GetSize()); + return WriteToPointerWithoutCheckingSize(&str->at(0), str->size()); +} + +bool PerfReader::WriteToPointer(char* buffer, size_t size) { + size_t required_size = GetSize(); + if (size < required_size) { + LOG(ERROR) << "Buffer is too small - buffer size is " << size + << " and required size is " << required_size; + return false; + } + return WriteToPointerWithoutCheckingSize(buffer, size); +} + + +bool PerfReader::WriteToPointerWithoutCheckingSize(char* buffer, size_t size) { + BufferWithSize data = { buffer, size }; + if (!WriteHeader(data) || + !WriteAttrs(data) || + !WriteEventTypes(data) || + !WriteData(data) || + !WriteMetadata(data)) { + return false; + } + return true; +} + +size_t PerfReader::GetSize() { + // TODO(rohinmshah): This is not a good CHECK. See TODO in perf_reader.h. + CHECK(RegenerateHeader()); + + size_t total_size = 0; + total_size = 0; + total_size += out_header_.size; + total_size += out_header_.attrs.size; + total_size += out_header_.event_types.size; + total_size += out_header_.data.size; + // Add the ID info, whose size is not explicitly included in the header. + for (size_t i = 0; i < attrs_.size(); ++i) + total_size += attrs_[i].ids.size() * sizeof(attrs_[i].ids[0]); + + // Additional info about metadata. See WriteMetadata for explanation. + total_size += (GetNumMetadata() + 1) * 2 * sizeof(u64); + + // Add the sizes of the various metadata. + total_size += GetBuildIDMetadataSize(); + total_size += GetStringMetadataSize(); + total_size += GetUint32MetadataSize(); + total_size += GetUint64MetadataSize(); + total_size += GetEventDescMetadataSize(); + total_size += GetCPUTopologyMetadataSize(); + total_size += GetNUMATopologyMetadataSize(); + return total_size; +} + +bool PerfReader::RegenerateHeader() { + // This is the order of the input perf file contents in normal mode: + // 1. Header + // 2. Attribute IDs (pointed to by attr.ids.offset) + // 3. Attributes + // 4. Event types + // 5. Data + // 6. Metadata + + // Compute offsets in the above order. + CheckNoEventHeaderPadding(); + memset(&out_header_, 0, sizeof(out_header_)); + out_header_.magic = kPerfMagic; + out_header_.size = sizeof(out_header_); + out_header_.attr_size = sizeof(attrs_[0].attr) + sizeof(perf_file_section); + out_header_.attrs.size = out_header_.attr_size * attrs_.size(); + for (size_t i = 0; i < events_.size(); i++) + out_header_.data.size += events_[i]->header.size; + out_header_.event_types.size = event_types_.size() * sizeof(event_types_[0]); + + u64 current_offset = 0; + current_offset += out_header_.size; + for (size_t i = 0; i < attrs_.size(); i++) + current_offset += sizeof(attrs_[i].ids[0]) * attrs_[i].ids.size(); + out_header_.attrs.offset = current_offset; + current_offset += out_header_.attrs.size; + out_header_.event_types.offset = current_offset; + current_offset += out_header_.event_types.size; + + out_header_.data.offset = current_offset; + + // Construct the header feature bits. + memset(&out_header_.adds_features, 0, sizeof(out_header_.adds_features)); + // The following code makes the assumption that all feature bits are in the + // first word of |adds_features|. If the perf data format changes and the + // assumption is no longer valid, this CHECK will fail, at which point the + // below code needs to be updated. For now, sticking to that assumption keeps + // the code simple. + // This assumption is also used when reading metadata, so that code + // will also have to be updated if this CHECK starts to fail. + CHECK_LE(static_cast(HEADER_LAST_FEATURE), + BytesToBits(sizeof(out_header_.adds_features[0]))); + if (sample_type_ & PERF_SAMPLE_BRANCH_STACK) + out_header_.adds_features[0] |= (1 << HEADER_BRANCH_STACK); + out_header_.adds_features[0] |= metadata_mask_ & kSupportedMetadataMask; + + return true; +} + +bool PerfReader::InjectBuildIDs( + const std::map& filenames_to_build_ids) { + metadata_mask_ |= (1 << HEADER_BUILD_ID); + std::set updated_filenames; + // Inject new build ID's for existing build ID events. + for (size_t i = 0; i < build_id_events_.size(); ++i) { + build_id_event* event = build_id_events_[i]; + string filename = event->filename; + if (filenames_to_build_ids.find(filename) == filenames_to_build_ids.end()) + continue; + + string build_id = filenames_to_build_ids.at(filename); + PerfizeBuildIDString(&build_id); + // Changing a build id should always result in an update, never creation. + CHECK_EQ(event, CreateOrUpdateBuildID(build_id, "", 0, event)); + updated_filenames.insert(filename); + } + + // For files with no existing build ID events, create new build ID events. + // This requires a lookup of all MMAP's to determine the |misc| field of each + // build ID event. + std::map filename_to_misc; + for (size_t i = 0; i < events_.size(); ++i) { + const event_t& event = *events_[i]; + if (event.header.type != PERF_RECORD_MMAP) + continue; + filename_to_misc[event.mmap.filename] = event.header.misc; + } + + std::map::const_iterator it; + for (it = filenames_to_build_ids.begin(); + it != filenames_to_build_ids.end(); + ++it) { + const string& filename = it->first; + if (updated_filenames.find(filename) != updated_filenames.end()) + continue; + + // Determine the misc field. + uint16 new_misc = PERF_RECORD_MISC_KERNEL; + std::map::const_iterator misc_iter = + filename_to_misc.find(filename); + if (misc_iter != filename_to_misc.end()) + new_misc = misc_iter->second; + + string build_id = it->second; + PerfizeBuildIDString(&build_id); + build_id_event* event = + CreateOrUpdateBuildID(build_id, filename, new_misc, NULL); + CHECK(event); + build_id_events_.push_back(event); + } + + return true; +} + +bool PerfReader::Localize( + const std::map& build_ids_to_filenames) { + std::map perfized_build_ids_to_filenames; + std::map::const_iterator it; + for (it = build_ids_to_filenames.begin(); + it != build_ids_to_filenames.end(); + ++it) { + string build_id = it->first; + PerfizeBuildIDString(&build_id); + perfized_build_ids_to_filenames[build_id] = it->second; + } + + std::map filename_map; + for (size_t i = 0; i < build_id_events_.size(); ++i) { + build_id_event* event = build_id_events_[i]; + string build_id = HexToString(event->build_id, kBuildIDArraySize); + if (perfized_build_ids_to_filenames.find(build_id) == + perfized_build_ids_to_filenames.end()) { + continue; + } + + string new_name = perfized_build_ids_to_filenames.at(build_id); + filename_map[string(event->filename)] = new_name; + build_id_event* new_event = CreateOrUpdateBuildID("", new_name, 0, event); + CHECK(new_event); + build_id_events_[i] = new_event; + } + + LocalizeUsingFilenames(filename_map); + return true; +} + +bool PerfReader::LocalizeUsingFilenames( + const std::map& filename_map) { + LocalizeMMapFilenames(filename_map); + for (size_t i = 0; i < build_id_events_.size(); ++i) { + build_id_event* event = build_id_events_[i]; + string old_name = event->filename; + + if (filename_map.find(event->filename) != filename_map.end()) { + const string& new_name = filename_map.at(old_name); + build_id_event* new_event = CreateOrUpdateBuildID("", new_name, 0, event); + CHECK(new_event); + build_id_events_[i] = new_event; + } + } + return true; +} + +void PerfReader::GetFilenames(std::vector* filenames) const { + std::set filename_set; + GetFilenamesAsSet(&filename_set); + filenames->clear(); + filenames->insert(filenames->begin(), filename_set.begin(), + filename_set.end()); +} + +void PerfReader::GetFilenamesAsSet(std::set* filenames) const { + filenames->clear(); + for (size_t i = 0; i < events_.size(); ++i) { + const event_t& event = *events_[i]; + if (event.header.type == PERF_RECORD_MMAP) + filenames->insert(event.mmap.filename); + } +} + +void PerfReader::GetFilenamesToBuildIDs( + std::map* filenames_to_build_ids) const { + filenames_to_build_ids->clear(); + for (size_t i = 0; i < build_id_events_.size(); ++i) { + const build_id_event& event = *build_id_events_[i]; + string build_id = HexToString(event.build_id, kBuildIDArraySize); + (*filenames_to_build_ids)[event.filename] = build_id; + } +} + +bool PerfReader::IsSupportedEventType(uint32 type) { + switch (type) { + case PERF_RECORD_SAMPLE: + case PERF_RECORD_MMAP: + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: + case PERF_RECORD_COMM: + case PERF_RECORD_LOST: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: + return true; + case PERF_RECORD_READ: + case PERF_RECORD_MAX: + return false; + default: + LOG(FATAL) << "Unknown event type " << type; + return false; + } +} + +bool PerfReader::ReadPerfSampleInfo(const event_t& event, + struct perf_sample* sample) const { + CHECK(sample); + + if (!IsSupportedEventType(event.header.type)) { + LOG(ERROR) << "Unsupported event type " << event.header.type; + return false; + } + + uint64 sample_format = GetSampleFieldsForEventType(event.header.type, + sample_type_); + uint64 offset = GetPerfSampleDataOffset(event); + size_t size_read = ReadPerfSampleFromData( + reinterpret_cast(&event) + offset / sizeof(uint64), + sample_format, + read_format_, + is_cross_endian_, + sample); + + if (event.header.type == PERF_RECORD_SAMPLE) { + sample->pid = event.ip.pid; + sample->tid = event.ip.tid; + if (is_cross_endian_) { + ByteSwap(&sample->pid); + ByteSwap(&sample->tid); + } + } + + size_t expected_size = event.header.size - offset; + if (size_read != expected_size) { + LOG(ERROR) << "Read " << size_read << " bytes, expected " + << expected_size << " bytes."; + } + + return (size_read == expected_size); +} + +bool PerfReader::WritePerfSampleInfo(const perf_sample& sample, + event_t* event) const { + CHECK(event); + + if (!IsSupportedEventType(event->header.type)) { + LOG(ERROR) << "Unsupported event type " << event->header.type; + return false; + } + + uint64 sample_format = GetSampleFieldsForEventType(event->header.type, + sample_type_); + uint64 offset = GetPerfSampleDataOffset(*event); + + size_t expected_size = event->header.size - offset; + memset(reinterpret_cast(event) + offset, 0, expected_size); + size_t size_written = WritePerfSampleToData( + sample, + sample_format, + read_format_, + reinterpret_cast(event) + offset / sizeof(uint64)); + if (size_written != expected_size) { + LOG(ERROR) << "Wrote " << size_written << " bytes, expected " + << expected_size << " bytes."; + } + + return (size_written == expected_size); +} + +bool PerfReader::ReadHeader(const ConstBufferWithSize& data) { + CheckNoEventHeaderPadding(); + size_t offset = 0; + if (!ReadDataFromBuffer(data, sizeof(header_), "header data", + &offset, &header_)) { + return false; + } + if (header_.magic != kPerfMagic && header_.magic != bswap_64(kPerfMagic)) { + LOG(ERROR) << "Read wrong magic. Expected: " << kPerfMagic + << " or " << bswap_64(kPerfMagic) + << " Got: " << header_.magic; + return false; + } + is_cross_endian_ = (header_.magic != kPerfMagic); + if (is_cross_endian_) + ByteSwap(&header_.size); + + // Header can be a piped header. + if (header_.size != sizeof(header_)) + return true; + + DLOG(INFO) << "event_types.size: " << header_.event_types.size; + DLOG(INFO) << "event_types.offset: " << header_.event_types.offset; + + return true; +} + +bool PerfReader::ReadAttrs(const ConstBufferWithSize& data) { + size_t num_attrs = header_.attrs.size / header_.attr_size; + size_t offset = header_.attrs.offset; + for (size_t i = 0; i < num_attrs; i++) { + if (!ReadAttr(data, &offset)) + return false; + } + return true; +} + +bool PerfReader::ReadAttr(const ConstBufferWithSize& data, size_t* offset) { + PerfFileAttr attr; + if (!ReadEventAttr(data, offset, &attr.attr)) + return false; + + perf_file_section ids; + if (!ReadDataFromBuffer(data, sizeof(ids), "ID section info", offset, &ids)) + return false; + if (is_cross_endian_) { + ByteSwap(&ids.offset); + ByteSwap(&ids.size); + } + + size_t num_ids = ids.size / sizeof(attr.ids[0]); + // Convert the offset from u64 to size_t. + size_t ids_offset = ids.offset; + if (!ReadUniqueIDs(data, num_ids, &ids_offset, &attr.ids)) + return false; + attrs_.push_back(attr); + return true; +} + +bool PerfReader::ReadEventAttr(const ConstBufferWithSize& data, size_t* offset, + perf_event_attr* attr) { + CheckNoPerfEventAttrPadding(); + if (!ReadDataFromBuffer(data, sizeof(*attr), "attribute", offset, attr)) + return false; + + if (is_cross_endian_) { + ByteSwap(&attr->type); + ByteSwap(&attr->size); + ByteSwap(&attr->config); + ByteSwap(&attr->sample_period); + ByteSwap(&attr->sample_type); + ByteSwap(&attr->read_format); + ByteSwap(&attr->wakeup_events); + ByteSwap(&attr->bp_type); + ByteSwap(&attr->bp_addr); + ByteSwap(&attr->bp_len); + ByteSwap(&attr->branch_sample_type); + } + + // The actual perf_event_attr data size might be different from the size of + // the struct definition. Check against perf_event_attr's |size| field. + int size_diff = attr->size - sizeof(*attr); + *offset += size_diff; + attr->size = sizeof(*attr); + + // Assign sample type if it hasn't been assigned, otherwise make sure all + // subsequent attributes have the same sample type bits set. + if (sample_type_ == 0) { + sample_type_ = attr->sample_type; + } else { + CHECK_EQ(sample_type_, attr->sample_type) + << "Event type sample format does not match sample format of other " + << "event type."; + } + + if (read_format_ == 0) { + read_format_ = attr->read_format; + } else { + CHECK_EQ(read_format_, attr->read_format) + << "Event type read format does not match read format of other event " + << "types."; + } + + return true; +} + +bool PerfReader::ReadUniqueIDs(const ConstBufferWithSize& data, size_t num_ids, + size_t* offset, std::vector* ids) { + ids->resize(num_ids); + for (size_t j = 0; j < num_ids; j++) { + if (!ReadDataFromBuffer(data, sizeof(ids->at(j)), "ID", offset, + &ids->at(j))) { + return false; + } + if (is_cross_endian_) + ByteSwap(&ids->at(j)); + } + return true; +} + +bool PerfReader::ReadEventTypes(const ConstBufferWithSize& data) { + size_t num_event_types = header_.event_types.size / + sizeof(struct perf_trace_event_type); + CHECK_EQ(sizeof(perf_trace_event_type) * num_event_types, + header_.event_types.size); + size_t offset = header_.event_types.offset; + for (size_t i = 0; i < num_event_types; ++i) { + if (!ReadEventType(data, &offset)) + return false; + } + return true; +} + +bool PerfReader::ReadEventType(const ConstBufferWithSize& data, + size_t* offset) { + CheckNoEventTypePadding(); + perf_trace_event_type type; + memset(&type, 0, sizeof(type)); + if (!ReadDataFromBuffer(data, sizeof(type.event_id), "event id", + offset, &type.event_id)) { + return false; + } + const char* event_name = reinterpret_cast(data.ptr + *offset); + CHECK_GT(snprintf(type.name, sizeof(type.name), "%s", event_name), 0); + *offset += sizeof(type.name); + event_types_.push_back(type); + return true; +} + +bool PerfReader::ReadData(const ConstBufferWithSize& data) { + u64 data_remaining_bytes = header_.data.size; + size_t offset = header_.data.offset; + while (data_remaining_bytes != 0) { + if (data.size < offset) { + LOG(ERROR) << "Not enough data to read a perf event."; + return false; + } + + const event_t* event = reinterpret_cast(data.ptr + offset); + if (!ReadPerfEventBlock(*event)) + return false; + data_remaining_bytes -= event->header.size; + offset += event->header.size; + } + + DLOG(INFO) << "Number of events stored: "<< events_.size(); + return true; +} + +bool PerfReader::ReadMetadata(const ConstBufferWithSize& data) { + size_t offset = header_.data.offset + header_.data.size; + + for (u32 type = HEADER_FIRST_FEATURE; type != HEADER_LAST_FEATURE; ++type) { + if ((metadata_mask_ & (1 << type)) == 0) + continue; + + if (data.size < offset) { + LOG(ERROR) << "Not enough data to read offset and size of metadata."; + return false; + } + + u64 metadata_offset, metadata_size; + if (!ReadDataFromBuffer(data, sizeof(metadata_offset), "metadata offset", + &offset, &metadata_offset) || + !ReadDataFromBuffer(data, sizeof(metadata_size), "metadata size", + &offset, &metadata_size)) { + return false; + } + + if (data.size < metadata_offset + metadata_size) { + LOG(ERROR) << "Not enough data to read metadata."; + return false; + } + + switch (type) { + case HEADER_BUILD_ID: + if (!ReadBuildIDMetadata(data, type, metadata_offset, metadata_size)) + return false; + break; + case HEADER_HOSTNAME: + case HEADER_OSRELEASE: + case HEADER_VERSION: + case HEADER_ARCH: + case HEADER_CPUDESC: + case HEADER_CPUID: + case HEADER_CMDLINE: + if (!ReadStringMetadata(data, type, metadata_offset, metadata_size)) + return false; + break; + case HEADER_NRCPUS: + if (!ReadUint32Metadata(data, type, metadata_offset, metadata_size)) + return false; + break; + case HEADER_TOTAL_MEM: + if (!ReadUint64Metadata(data, type, metadata_offset, metadata_size)) + return false; + break; + case HEADER_EVENT_DESC: + break; + case HEADER_CPU_TOPOLOGY: + if (!ReadCPUTopologyMetadata(data, type, metadata_offset, metadata_size)) + return false; + break; + case HEADER_NUMA_TOPOLOGY: + if (!ReadNUMATopologyMetadata(data, type, metadata_offset, metadata_size)) + return false; + break; + case HEADER_BRANCH_STACK: + continue; + default: LOG(INFO) << "Unsupported metadata type: " << type; + break; + } + } + + // Event type events are optional in some newer versions of perf. They + // contain the same information that is already in |attrs_|. Make sure the + // number of event types matches the number of attrs, but only if there are + // event type events present. + if (event_types_.size() > 0) { + if (event_types_.size() != attrs_.size()) { + LOG(ERROR) << "Mismatch between number of event type events and attr " + << "events: " << event_types_.size() << " vs " + << attrs_.size(); + return false; + } + metadata_mask_ |= (1 << HEADER_EVENT_DESC); + } + return true; +} + +bool PerfReader::ReadBuildIDMetadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size) { + CheckNoBuildIDEventPadding(); + while (size > 0) { + // Make sure there is enough data for everything but the filename. + if (data.size < offset + sizeof(build_id_event) / sizeof(*data.ptr)) { + LOG(ERROR) << "Not enough bytes to read build id event"; + return false; + } + + const build_id_event* temp_ptr = + reinterpret_cast(data.ptr + offset); + u16 event_size = temp_ptr->header.size; + if (is_cross_endian_) + ByteSwap(&event_size); + + // Make sure there is enough data for the rest of the event. + if (data.size < offset + event_size / sizeof(*data.ptr)) { + LOG(ERROR) << "Not enough bytes to read build id event"; + return false; + } + + // Allocate memory for the event and copy over the bytes. + build_id_event* event = CallocMemoryForBuildID(event_size); + if (!ReadDataFromBuffer(data, event_size, "build id event", + &offset, event)) { + return false; + } + if (is_cross_endian_) { + ByteSwap(&event->header.type); + ByteSwap(&event->header.misc); + ByteSwap(&event->header.size); + ByteSwap(&event->pid); + } + size -= event_size; + + // Perf tends to use more space than necessary, so fix the size. + event->header.size = + sizeof(*event) + GetUint64AlignedStringLength(event->filename); + build_id_events_.push_back(event); + } + + return true; +} + +bool PerfReader::ReadStringMetadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size) { + PerfStringMetadata str_data; + str_data.type = type; + + size_t start_offset = offset; + // Skip the number of string data if it is present. + if (NeedsNumberOfStringData(type)) + offset += sizeof(num_string_data_type) / sizeof(*data.ptr); + + while ((offset - start_offset) < size) { + CStringWithLength single_string; + if (!ReadStringFromBuffer(data, is_cross_endian_, &offset, &single_string)) + return false; + str_data.data.push_back(single_string); + } + + string_metadata_.push_back(str_data); + return true; +} + +bool PerfReader::ReadUint32Metadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size) { + PerfUint32Metadata uint32_data; + uint32_data.type = type; + + size_t start_offset = offset; + while (size > offset - start_offset) { + uint32 item; + if (!ReadDataFromBuffer(data, sizeof(item), "uint32 data", &offset, &item)) + return false; + if (is_cross_endian_) + ByteSwap(&item); + uint32_data.data.push_back(item); + } + + uint32_metadata_.push_back(uint32_data); + return true; +} + +bool PerfReader::ReadUint64Metadata(const ConstBufferWithSize& data, u32 type, + size_t offset, size_t size) { + PerfUint64Metadata uint64_data; + uint64_data.type = type; + + size_t start_offset = offset; + while (size > offset - start_offset) { + uint64 item; + if (!ReadDataFromBuffer(data, sizeof(item), "uint64 data", &offset, &item)) + return false; + if (is_cross_endian_) + ByteSwap(&item); + uint64_data.data.push_back(item); + } + + uint64_metadata_.push_back(uint64_data); + return true; +} + +bool PerfReader::ReadCPUTopologyMetadata( + const ConstBufferWithSize& data, u32 type, size_t offset, size_t size) { + num_siblings_type num_core_siblings; + if (!ReadDataFromBuffer(data, sizeof(num_core_siblings), "num cores", + &offset, &num_core_siblings)) { + return false; + } + if (is_cross_endian_) + ByteSwap(&num_core_siblings); + + cpu_topology_.core_siblings.resize(num_core_siblings); + for (size_t i = 0; i < num_core_siblings; ++i) { + if (!ReadStringFromBuffer(data, is_cross_endian_, &offset, + &cpu_topology_.core_siblings[i])) { + return false; + } + } + + num_siblings_type num_thread_siblings; + if (!ReadDataFromBuffer(data, sizeof(num_thread_siblings), "num threads", + &offset, &num_thread_siblings)) { + return false; + } + if (is_cross_endian_) + ByteSwap(&num_thread_siblings); + + cpu_topology_.thread_siblings.resize(num_thread_siblings); + for (size_t i = 0; i < num_thread_siblings; ++i) { + if (!ReadStringFromBuffer(data, is_cross_endian_, &offset, + &cpu_topology_.thread_siblings[i])) { + return false; + } + } + + return true; +} + +bool PerfReader::ReadNUMATopologyMetadata( + const ConstBufferWithSize& data, u32 type, size_t offset, size_t size) { + numa_topology_num_nodes_type num_nodes; + if (!ReadDataFromBuffer(data, sizeof(num_nodes), "num nodes", + &offset, &num_nodes)) { + return false; + } + if (is_cross_endian_) + ByteSwap(&num_nodes); + + for (size_t i = 0; i < num_nodes; ++i) { + PerfNodeTopologyMetadata node; + if (!ReadDataFromBuffer(data, sizeof(node.id), "node id", + &offset, &node.id) || + !ReadDataFromBuffer(data, sizeof(node.total_memory), + "node total memory", &offset, + &node.total_memory) || + !ReadDataFromBuffer(data, sizeof(node.free_memory), + "node free memory", &offset, &node.free_memory) || + !ReadStringFromBuffer(data, is_cross_endian_, &offset, + &node.cpu_list)) { + return false; + } + if (is_cross_endian_) { + ByteSwap(&node.id); + ByteSwap(&node.total_memory); + ByteSwap(&node.free_memory); + } + numa_topology_.push_back(node); + } + return true; +} + +bool PerfReader::ReadPipedData(const ConstBufferWithSize& data) { + size_t offset = piped_header_.size; + bool result = true; + metadata_mask_ = 0; + CheckNoEventHeaderPadding(); + + while (offset < data.size && result) { + perf_event_header header; + if (offset + sizeof(header) > data.size) { + LOG(ERROR) << "Not enough bytes left in data to read header. Required: " + << sizeof(header) << " bytes. Available: " + << data.size - offset << " bytes."; + return true; + } + + // Copy the header and swap bytes if necessary. + header = *reinterpret_cast(data.ptr + offset); + if (is_cross_endian_) { + ByteSwap(&header.type); + ByteSwap(&header.misc); + ByteSwap(&header.size); + } + + if (data.size < offset + header.size) { + LOG(ERROR) << "Not enough bytes to read piped event. Required: " + << header.size << " bytes. Available: " + << data.size - offset << " bytes."; + return true; + } + + size_t new_offset = offset + sizeof(header); + size_t size_without_header = header.size - sizeof(header); + + if (header.type < PERF_RECORD_MAX) { + const event_t* event = + reinterpret_cast(data.ptr + offset); + result = ReadPerfEventBlock(*event); + offset += header.size; + continue; + } + + switch (header.type) { + case PERF_RECORD_HEADER_ATTR: + result = ReadAttrEventBlock(data, new_offset, size_without_header); + break; + case PERF_RECORD_HEADER_EVENT_TYPE: + result = ReadEventType(data, &new_offset); + break; + case PERF_RECORD_HEADER_EVENT_DESC: + break; + case PERF_RECORD_HEADER_BUILD_ID: + metadata_mask_ |= (1 << HEADER_BUILD_ID); + result = ReadBuildIDMetadata(data, HEADER_BUILD_ID, offset, header.size); + break; + case PERF_RECORD_HEADER_HOSTNAME: + metadata_mask_ |= (1 << HEADER_HOSTNAME); + result = ReadStringMetadata(data, HEADER_HOSTNAME, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_OSRELEASE: + metadata_mask_ |= (1 << HEADER_OSRELEASE); + result = ReadStringMetadata(data, HEADER_OSRELEASE, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_VERSION: + metadata_mask_ |= (1 << HEADER_VERSION); + result = ReadStringMetadata(data, HEADER_VERSION, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_ARCH: + metadata_mask_ |= (1 << HEADER_ARCH); + result = ReadStringMetadata(data, HEADER_ARCH, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_CPUDESC: + metadata_mask_ |= (1 << HEADER_CPUDESC); + result = ReadStringMetadata(data, HEADER_CPUDESC, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_CPUID: + metadata_mask_ |= (1 << HEADER_CPUID); + result = ReadStringMetadata(data, HEADER_CPUID, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_CMDLINE: + metadata_mask_ |= (1 << HEADER_CMDLINE); + result = ReadStringMetadata(data, HEADER_CMDLINE, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_NRCPUS: + metadata_mask_ |= (1 << HEADER_NRCPUS); + result = ReadUint32Metadata(data, HEADER_NRCPUS, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_TOTAL_MEM: + metadata_mask_ |= (1 << HEADER_TOTAL_MEM); + result = ReadUint64Metadata(data, HEADER_TOTAL_MEM, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_CPU_TOPOLOGY: + metadata_mask_ |= (1 << HEADER_CPU_TOPOLOGY); + result = ReadCPUTopologyMetadata(data, HEADER_CPU_TOPOLOGY, new_offset, + size_without_header); + break; + case PERF_RECORD_HEADER_NUMA_TOPOLOGY: + metadata_mask_ |= (1 << HEADER_NUMA_TOPOLOGY); + result = ReadNUMATopologyMetadata(data, HEADER_NUMA_TOPOLOGY, new_offset, + size_without_header); + break; + default: + LOG(WARNING) << "Event type " << header.type + << " is not yet supported!"; + break; + } + offset += header.size; + } + + if (!result) { + return false; + } + // Event type events are optional in some newer versions of perf. They + // contain the same information that is already in |attrs_|. Make sure the + // number of event types matches the number of attrs, but only if there are + // event type events present. + if (event_types_.size() > 0) { + if (event_types_.size() != attrs_.size()) { + LOG(ERROR) << "Mismatch between number of event type events and attr " + << "events: " << event_types_.size() << " vs " + << attrs_.size(); + return false; + } + metadata_mask_ |= (1 << HEADER_EVENT_DESC); + } + return result; +} + +bool PerfReader::WriteHeader(const BufferWithSize& data) const { + CheckNoEventHeaderPadding(); + size_t size = sizeof(out_header_); + size_t offset = 0; + return WriteDataToBuffer(&out_header_, size, "file header", &offset, data); +} + +bool PerfReader::WriteAttrs(const BufferWithSize& data) const { + CheckNoPerfEventAttrPadding(); + size_t offset = out_header_.attrs.offset; + size_t id_offset = out_header_.size; + + for (size_t i = 0; i < attrs_.size(); i++) { + const PerfFileAttr& attr = attrs_[i]; + struct perf_file_section ids; + ids.offset = id_offset; + ids.size = attr.ids.size() * sizeof(attr.ids[0]); + + for (size_t j = 0; j < attr.ids.size(); j++) { + const uint64 id = attr.ids[j]; + if (!WriteDataToBuffer(&id, sizeof(id), "ID info", &id_offset, data)) + return false; + } + + if (!WriteDataToBuffer(&attr.attr, sizeof(attr.attr), "attribute", + &offset, data) || + !WriteDataToBuffer(&ids, sizeof(ids), "ID section", &offset, data)) { + return false; + } + } + return true; +} + +bool PerfReader::WriteData(const BufferWithSize& data) const { + size_t offset = out_header_.data.offset; + for (size_t i = 0; i < events_.size(); ++i) { + if (!WriteDataToBuffer(events_[i], events_[i]->header.size, "event data", + &offset, data)) { + return false; + } + } + return true; +} + +bool PerfReader::WriteMetadata(const BufferWithSize& data) const { + size_t header_offset = out_header_.data.offset + out_header_.data.size; + + // Before writing the metadata, there is one header for each piece + // of metadata, and one extra showing the end of the file. + // Each header contains two 64-bit numbers (offset and size). + size_t metadata_offset = + header_offset + (GetNumMetadata() + 1) * 2 * sizeof(u64); + + // Zero out the memory used by the headers + memset(data.ptr + header_offset, 0, + (metadata_offset - header_offset) * sizeof(*data.ptr)); + + for (u32 type = HEADER_FIRST_FEATURE; type != HEADER_LAST_FEATURE; ++type) { + if ((out_header_.adds_features[0] & (1 << type)) == 0) + continue; + + u64 start_offset = metadata_offset; + // Write actual metadata to address metadata_offset + switch (type) { + case HEADER_BUILD_ID: + if (!WriteBuildIDMetadata(type, &metadata_offset, data)) + return false; + break; + case HEADER_HOSTNAME: + case HEADER_OSRELEASE: + case HEADER_VERSION: + case HEADER_ARCH: + case HEADER_CPUDESC: + case HEADER_CPUID: + case HEADER_CMDLINE: + if (!WriteStringMetadata(type, &metadata_offset, data)) + return false; + break; + case HEADER_NRCPUS: + if (!WriteUint32Metadata(type, &metadata_offset, data)) + return false; + break; + case HEADER_TOTAL_MEM: + if (!WriteUint64Metadata(type, &metadata_offset, data)) + return false; + break; + case HEADER_EVENT_DESC: + if (!WriteEventDescMetadata(type, &metadata_offset, data)) + return false; + break; + case HEADER_CPU_TOPOLOGY: + if (!WriteCPUTopologyMetadata(type, &metadata_offset, data)) + return false; + break; + case HEADER_NUMA_TOPOLOGY: + if (!WriteNUMATopologyMetadata(type, &metadata_offset, data)) + return false; + break; + case HEADER_BRANCH_STACK: + continue; + default: LOG(ERROR) << "Unsupported metadata type: " << type; + return false; + } + + // Write metadata offset and size to address header_offset. + u64 metadata_size = metadata_offset - start_offset; + if (!WriteDataToBuffer(&start_offset, sizeof(start_offset), + "metadata offset", &header_offset, data) || + !WriteDataToBuffer(&metadata_size, sizeof(metadata_size), + "metadata size", &header_offset, data)) { + return false; + } + } + + // Write the last entry - a pointer to the end of the file + if (!WriteDataToBuffer(&metadata_offset, sizeof(metadata_offset), + "metadata offset", &header_offset, data)) { + return false; + } + + return true; +} + +bool PerfReader::WriteBuildIDMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const { + CheckNoBuildIDEventPadding(); + for (size_t i = 0; i < build_id_events_.size(); ++i) { + const build_id_event* event = build_id_events_[i]; + if (!WriteDataToBuffer(event, event->header.size, "Build ID metadata", + offset, data)) { + return false; + } + } + return true; +} + +bool PerfReader::WriteStringMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const { + for (size_t i = 0; i < string_metadata_.size(); ++i) { + const PerfStringMetadata& str_data = string_metadata_[i]; + if (str_data.type == type) { + num_string_data_type num_strings = str_data.data.size(); + if (NeedsNumberOfStringData(type) && + !WriteDataToBuffer(&num_strings, sizeof(num_strings), + "number of string metadata", offset, data)) { + return false; + } + + for (size_t j = 0; j < num_strings; ++j) { + const CStringWithLength& single_string = str_data.data[j]; + if (!WriteStringToBuffer(single_string, data, offset)) + return false; + } + + return true; + } + } + LOG(ERROR) << "String metadata of type " << type << " not present"; + return false; +} + +bool PerfReader::WriteUint32Metadata(u32 type, size_t* offset, + const BufferWithSize& data) const { + for (size_t i = 0; i < uint32_metadata_.size(); ++i) { + const PerfUint32Metadata& uint32_data = uint32_metadata_[i]; + if (uint32_data.type == type) { + const std::vector& int_vector = uint32_data.data; + + for (size_t j = 0; j < int_vector.size(); ++j) { + if (!WriteDataToBuffer(&int_vector[j], sizeof(int_vector[j]), + "uint32 metadata", offset, data)) { + return false; + } + } + + return true; + } + } + LOG(ERROR) << "Uint32 metadata of type " << type << " not present"; + return false; +} + +bool PerfReader::WriteUint64Metadata(u32 type, size_t* offset, + const BufferWithSize& data) const { + for (size_t i = 0; i < uint64_metadata_.size(); ++i) { + const PerfUint64Metadata& uint64_data = uint64_metadata_[i]; + if (uint64_data.type == type) { + const std::vector& int_vector = uint64_data.data; + + for (size_t j = 0; j < int_vector.size(); ++j) { + if (!WriteDataToBuffer(&int_vector[j], sizeof(int_vector[j]), + "uint64 metadata", offset, data)) { + return false; + } + } + + return true; + } + } + LOG(ERROR) << "Uint64 metadata of type " << type << " not present"; + return false; +} + +bool PerfReader::WriteEventDescMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const { + CheckNoPerfEventAttrPadding(); + // There should be an attribute for each event type. + CHECK_EQ(event_types_.size(), attrs_.size()); + + event_desc_num_events num_events = event_types_.size(); + if (!WriteDataToBuffer(&num_events, sizeof(num_events), + "event_desc num_events", offset, data)) { + return false; + } + event_desc_attr_size attr_size = sizeof(perf_event_attr); + if (!WriteDataToBuffer(&attr_size, sizeof(attr_size), + "event_desc attr_size", offset, data)) { + return false; + } + + for (size_t i = 0; i < num_events; ++i) { + const perf_trace_event_type event_type = event_types_[i]; + const PerfFileAttr& attr = attrs_[i]; + if (!WriteDataToBuffer(&attr.attr, sizeof(attr.attr), + "event_desc attribute", offset, data)) { + return false; + } + + event_desc_num_unique_ids num_ids = attr.ids.size(); + if (!WriteDataToBuffer(&num_ids, sizeof(num_ids), + "event_desc num_unique_ids", offset, data)) { + return false; + } + + CStringWithLength container; + container.len = GetUint64AlignedStringLength(event_type.name); + container.str = string(event_type.name); + if (!WriteStringToBuffer(container, data, offset)) + return false; + + if (!WriteDataToBuffer(&attr.ids[0], num_ids * sizeof(attr.ids[0]), + "event_desc unique_ids", offset, data)) { + return false; + } + } + return true; +} + +bool PerfReader::WriteCPUTopologyMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const { + const std::vector& cores = cpu_topology_.core_siblings; + num_siblings_type num_cores = cores.size(); + if (!WriteDataToBuffer(&num_cores, sizeof(num_cores), "num cores", + offset, data)) { + return false; + } + for (size_t i = 0; i < num_cores; ++i) { + if (!WriteStringToBuffer(cores[i], data, offset)) + return false; + } + + const std::vector& threads = cpu_topology_.thread_siblings; + num_siblings_type num_threads = threads.size(); + if (!WriteDataToBuffer(&num_threads, sizeof(num_threads), "num threads", + offset, data)) { + return false; + } + for (size_t i = 0; i < num_threads; ++i) { + if (!WriteStringToBuffer(threads[i], data, offset)) + return false; + } + + return true; +} + +bool PerfReader::WriteNUMATopologyMetadata(u32 type, size_t* offset, + const BufferWithSize& data) const { + numa_topology_num_nodes_type num_nodes = numa_topology_.size(); + if (!WriteDataToBuffer(&num_nodes, sizeof(num_nodes), "num nodes", + offset, data)) { + return false; + } + + for (size_t i = 0; i < num_nodes; ++i) { + const PerfNodeTopologyMetadata& node = numa_topology_[i]; + if (!WriteDataToBuffer(&node.id, sizeof(node.id), "node id", + offset, data) || + !WriteDataToBuffer(&node.total_memory, sizeof(node.total_memory), + "node total memory", offset, data) || + !WriteDataToBuffer(&node.free_memory, sizeof(node.free_memory), + "node free memory", offset, data) || + !WriteStringToBuffer(node.cpu_list, data, offset)) { + return false; + } + } + return true; +} + +bool PerfReader::WriteEventTypes(const BufferWithSize& data) const { + CheckNoEventTypePadding(); + size_t offset = out_header_.event_types.offset; + for (size_t i = 0; i < event_types_.size(); ++i) { + const struct perf_trace_event_type& event_type = event_types_[i]; + if (!WriteDataToBuffer(&event_type, sizeof(event_type), "event type info", + &offset, data)) { + return false; + } + } + return true; +} + +bool PerfReader::ReadAttrEventBlock(const ConstBufferWithSize& data, + size_t offset, size_t size) { + PerfFileAttr attr; + if (!ReadEventAttr(data, &offset, &attr.attr)) + return false; + + size_t num_ids = (size - attr.attr.size) / sizeof(attr.ids[0]); + if (!ReadUniqueIDs(data, num_ids, &offset, &attr.ids)) + return false; + + // Event types are found many times in the perf data file. + // Only add this event type if it is not already present. + for (size_t i = 0; i < attrs_.size(); ++i) { + if (attrs_[i].ids[0] == attr.ids[0]) + return true; + } + attrs_.push_back(attr); + return true; +} + +// When this method is called, |event| is a reference to the bytes in the data +// vector that contains the entire perf.data file. As a result, we need to be +// careful to only copy event.header.size bytes. +// In particular, something like +// event_t event_copy = event; +// would be bad, because it would read past the end of the event, and possibly +// pass the end of the data vector as well. +bool PerfReader::ReadPerfEventBlock(const event_t& event) { + u16 size = event.header.size; + if (is_cross_endian_) + ByteSwap(&size); + + if (size > sizeof(event_t)) { + LOG(INFO) << "Data size: " << size << " sizeof(event_t): " + << sizeof(event_t); + return false; + } + + // Copy only the part of the event that is needed. + event_t* event_copy = CallocMemoryForEvent(size); + memcpy(event_copy, &event, size); + if (is_cross_endian_) { + ByteSwap(&event_copy->header.type); + ByteSwap(&event_copy->header.misc); + ByteSwap(&event_copy->header.size); + } + + uint32 type = event_copy->header.type; + if (is_cross_endian_) { + switch (type) { + case PERF_RECORD_SAMPLE: + ByteSwap(&event_copy->ip.ip); + ByteSwap(&event_copy->ip.pid); + ByteSwap(&event_copy->ip.tid); + break; + case PERF_RECORD_MMAP: + ByteSwap(&event_copy->mmap.pid); + ByteSwap(&event_copy->mmap.tid); + ByteSwap(&event_copy->mmap.start); + ByteSwap(&event_copy->mmap.len); + ByteSwap(&event_copy->mmap.pgoff); + break; + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: + ByteSwap(&event_copy->fork.pid); + ByteSwap(&event_copy->fork.tid); + ByteSwap(&event_copy->fork.ppid); + ByteSwap(&event_copy->fork.ptid); + break; + case PERF_RECORD_COMM: + ByteSwap(&event_copy->comm.pid); + ByteSwap(&event_copy->comm.tid); + break; + case PERF_RECORD_LOST: + ByteSwap(&event_copy->lost.id); + ByteSwap(&event_copy->lost.lost); + break; + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: + ByteSwap(&event_copy->throttle.time); + ByteSwap(&event_copy->throttle.id); + ByteSwap(&event_copy->throttle.stream_id); + break; + case PERF_RECORD_READ: + ByteSwap(&event_copy->read.pid); + ByteSwap(&event_copy->read.tid); + ByteSwap(&event_copy->read.value); + ByteSwap(&event_copy->read.time_enabled); + ByteSwap(&event_copy->read.time_running); + ByteSwap(&event_copy->read.id); + break; + default: + LOG(FATAL) << "Unknown event type: " << type; + } + } + + events_.push_back(event_copy); + + return true; +} + +size_t PerfReader::GetNumMetadata() const { + // This is just the number of 1s in the binary representation of the metadata + // mask. However, make sure to only use supported metadata, and don't include + // branch stack (since it doesn't have an entry in the metadata section). + uint64 new_mask = metadata_mask_; + new_mask &= kSupportedMetadataMask & ~(1 << HEADER_BRANCH_STACK); + std::bitset bits(new_mask); + return bits.count(); +} + +size_t PerfReader::GetEventDescMetadataSize() const { + size_t size = 0; + if (metadata_mask_ & (1 << HEADER_EVENT_DESC)) { + CHECK_EQ(event_types_.size(), attrs_.size()); + size += sizeof(event_desc_num_events) + sizeof(event_desc_attr_size); + CStringWithLength dummy; + for (size_t i = 0; i < attrs_.size(); ++i) { + size += sizeof(perf_event_attr) + sizeof(dummy.len); + size += sizeof(event_desc_num_unique_ids); + size += GetUint64AlignedStringLength(event_types_[i].name) * sizeof(char); + size += attrs_[i].ids.size() * sizeof(attrs_[i].ids[0]); + } + } + return size; +} + +size_t PerfReader::GetBuildIDMetadataSize() const { + size_t size = 0; + for (size_t i = 0; i < build_id_events_.size(); ++i) + size += build_id_events_[i]->header.size; + return size; +} + +size_t PerfReader::GetStringMetadataSize() const { + size_t size = 0; + for (size_t i = 0; i < string_metadata_.size(); ++i) { + const PerfStringMetadata& metadata = string_metadata_[i]; + if (NeedsNumberOfStringData(metadata.type)) + size += sizeof(num_string_data_type); + + for (size_t j = 0; j < metadata.data.size(); ++j) { + const CStringWithLength& str = metadata.data[j]; + size += sizeof(str.len) + (str.len * sizeof(char)); + } + } + return size; +} + +size_t PerfReader::GetUint32MetadataSize() const { + size_t size = 0; + for (size_t i = 0; i < uint32_metadata_.size(); ++i) { + const PerfUint32Metadata& metadata = uint32_metadata_[i]; + size += metadata.data.size() * sizeof(metadata.data[0]); + } + return size; +} + +size_t PerfReader::GetUint64MetadataSize() const { + size_t size = 0; + for (size_t i = 0; i < uint64_metadata_.size(); ++i) { + const PerfUint64Metadata& metadata = uint64_metadata_[i]; + size += metadata.data.size() * sizeof(metadata.data[0]); + } + return size; +} + +size_t PerfReader::GetCPUTopologyMetadataSize() const { + // Core siblings. + size_t size = sizeof(num_siblings_type); + for (size_t i = 0; i < cpu_topology_.core_siblings.size(); ++i) { + const CStringWithLength& str = cpu_topology_.core_siblings[i]; + size += sizeof(str.len) + (str.len * sizeof(char)); + } + + // Thread siblings. + size += sizeof(num_siblings_type); + for (size_t i = 0; i < cpu_topology_.thread_siblings.size(); ++i) { + const CStringWithLength& str = cpu_topology_.thread_siblings[i]; + size += sizeof(str.len) + (str.len * sizeof(char)); + } + + return size; +} + +size_t PerfReader::GetNUMATopologyMetadataSize() const { + size_t size = sizeof(numa_topology_num_nodes_type); + for (size_t i = 0; i < numa_topology_.size(); ++i) { + const PerfNodeTopologyMetadata& node = numa_topology_[i]; + size += sizeof(node.id); + size += sizeof(node.total_memory) + sizeof(node.free_memory); + size += sizeof(node.cpu_list.len) + node.cpu_list.len * sizeof(char); + } + return size; +} + +bool PerfReader::NeedsNumberOfStringData(u32 type) const { + return type == HEADER_CMDLINE; +} + +bool PerfReader::LocalizeMMapFilenames( + const std::map& filename_map) { + // Search for mmap events for which the filename needs to be updated. + for (size_t i = 0; i < events_.size(); ++i) { + event_t* event = events_[i]; + if (event->header.type != PERF_RECORD_MMAP) + continue; + + string key = string(event->mmap.filename); + if (filename_map.find(key) == filename_map.end()) + continue; + + string new_filename = filename_map.at(key); + size_t old_len = GetUint64AlignedStringLength(key); + size_t new_len = GetUint64AlignedStringLength(new_filename); + size_t old_offset = GetPerfSampleDataOffset(*event); + size_t sample_size = event->header.size - old_offset; + + int size_increase = new_len - old_len; + size_t new_size = event->header.size + size_increase; + size_t new_offset = old_offset + size_increase; + + if (size_increase > 0) { + // Allocate memory for a new event. + event_t* old_event = event; + event = CallocMemoryForEvent(new_size); + + // Copy over everything except filename and sample info. + memcpy(event, old_event, + sizeof(event->mmap) - sizeof(event->mmap.filename)); + + // Copy over the sample info to the correct location. + char* old_addr = reinterpret_cast(old_event); + char* new_addr = reinterpret_cast(event); + memcpy(new_addr + new_offset, old_addr + old_offset, sample_size); + + free(old_event); + events_[i] = event; + } else if (size_increase < 0) { + // Move the perf sample data to its new location. + // Since source and dest could overlap, use memmove instead of memcpy. + char* start_addr = reinterpret_cast(event); + memmove(start_addr + new_offset, start_addr + old_offset, sample_size); + } + + // Copy over the new filename and fix the size of the event. + CHECK_GT(snprintf(event->mmap.filename, new_filename.size() + 1, "%s", + new_filename.c_str()), + 0); + event->header.size = new_size; + } + + return true; +} + +} // namespace quipper Index: lib/ProfileData/PerfConverter/quipper/quipper_string.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/quipper_string.h @@ -0,0 +1,17 @@ +//=-- quipper_string.h ------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef QUIPPER_STRING_ +#define QUIPPER_STRING_ + +#ifndef HAS_GLOBAL_STRING +using std::string; +using std::stringstream; +#endif + +#endif // QUIPPER_STRING_ Index: lib/ProfileData/PerfConverter/quipper/utils.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/utils.h @@ -0,0 +1,85 @@ +//=-- utils.h ---------------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef QUIPPER_UTILS_H_ +#define QUIPPER_UTILS_H_ + +#include +#include + +#include "llvm_port.h" + +#include "kernel/perf_internals.h" +#include "quipper_string.h" + +namespace quipper { + +// Given a valid open file handle |fp|, returns the size of the file. +long int GetFileSizeFromHandle(FILE* fp); + +event_t* CallocMemoryForEvent(size_t size); + +build_id_event* CallocMemoryForBuildID(size_t size); + +bool FileToBuffer(const string& filename, std::vector* contents); + +bool BufferToFile(const string& filename, const std::vector& contents); + +// Stores the value of |contents| within a compressed file with name |filename|. +bool BufferToGZFile(const string& filename, const std::vector& contents); + +// Reads a compressed file with name |filename| into |contents|. +bool GZFileToBuffer(const string& filename, std::vector* contents); + +uint64 Md5Prefix(const string& input); + +// Returns a string that represents |array| in hexadecimal. +string HexToString(const u8* array, size_t length); + +// Converts |str| to a hexadecimal number, stored in |array|. Returns true on +// success. Only stores up to |length| bytes - if there are more characters in +// the string, they are ignored (but the function may still return true). +bool StringToHex(const string& str, u8* array, size_t length); + +// Adjust |size| to blocks of |align_size|. i.e. returns the smallest multiple +// of |align_size| that can fit |size|. +uint64 AlignSize(uint64 size, uint32 align_size); + +// Given a general perf sample format |sample_type|, return the fields of that +// format that are present in a sample for an event of type |event_type|. +// +// e.g. FORK and EXIT events have the fields {time, pid/tid, cpu, id}. +// Given a sample type with fields {ip, time, pid/tid, and period}, return +// the intersection of these two field sets: {time, pid/tid}. +// +// All field formats are bitfields, as defined by enum perf_event_sample_format +// in kernel/perf_event.h. +uint64 GetSampleFieldsForEventType(uint32 event_type, uint64 sample_type); + +// Returns the offset in bytes within a perf event structure at which the raw +// perf sample data is located. +uint64 GetPerfSampleDataOffset(const event_t& event); + +// Returns the size of the 8-byte-aligned memory for storing |string|. +size_t GetUint64AlignedStringLength(const string& str); + +// Reads the contents of a file into |data|. Returns true on success, false if +// it fails. +bool ReadFileToData(const string& filename, std::vector* data); + +// Writes contents of |data| to a file with name |filename|, overwriting any +// existing file. Returns true on success, false if it fails. +bool WriteDataToFile(const std::vector& data, const string& filename); + +// Executes |command| and stores stdout output in |output|. Returns true on +// success, false otherwise. +bool RunCommandAndGetStdout(const string& command, std::vector* output); + +} // namespace quipper + +#endif // QUIPPER_UTILS_H_ Index: lib/ProfileData/PerfConverter/quipper/utils.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/quipper/utils.cc @@ -0,0 +1,281 @@ +//=-- utils.cc --------------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +namespace { + +// Specify buffer size to be used to read files. +// This is allocated on the stack, so make sure it's less than 16k. +const int kFileReadSize = 1024; + +// Number of hex digits in a byte. +const int kNumHexDigitsInByte = 2; + +// Initial buffer size when reading compresed files. +const int kInitialBufferSizeForCompressedFiles = 4096; + +} // namespace + +namespace quipper { + +long int GetFileSizeFromHandle(FILE* fp) { + long int position = ftell(fp); + fseek(fp, 0, SEEK_END); + long int file_size = ftell(fp); + // Restore the original file handle position. + fseek(fp, position, SEEK_SET); + return file_size; +} + +event_t* CallocMemoryForEvent(size_t size) { + event_t* event = reinterpret_cast(calloc(1, size)); + CHECK(event); + return event; +} + +build_id_event* CallocMemoryForBuildID(size_t size) { + build_id_event* event = reinterpret_cast(calloc(1, size)); + CHECK(event); + return event; +} + + +bool GZFileToBuffer(const string& filename, std::vector* contents) { + gzFile fp = gzopen(filename.c_str(), "rb"); + if (!fp) + return false; + size_t total_bytes_read = 0; + contents->resize(kInitialBufferSizeForCompressedFiles); + while (true) { + size_t bytes_read = gzread( + fp, + &((*contents)[total_bytes_read]), + contents->size() - total_bytes_read); + total_bytes_read += bytes_read; + if (total_bytes_read != contents->size()) + break; + contents->resize(contents->size() * 2); + } + contents->resize(total_bytes_read); + int error; + const char* error_string = gzerror(fp, &error); + gzclose(fp); + if (error != Z_STREAM_END && error != Z_OK) { + LOG(ERROR) << "Error while reading gzip file: " << error_string; + return false; + } + return true; +} + +bool BufferToGZFile(const string& filename, const std::vector& contents) { + gzFile fp; + fp = gzopen(filename.c_str(), "wb"); + if (!fp) + return false; + if (!contents.empty()) { + CHECK_GT(gzwrite(fp, + &contents[0], contents.size() * sizeof(contents[0])), 0); + } + gzclose(fp); + return true; +} + +bool BufferToFile(const string& filename, const std::vector& contents) { + FILE* fp = fopen(filename.c_str(), "wb"); + if (!fp) + return false; + // Do not write anything if |contents| contains nothing. fopen will create + // an empty file. + if (!contents.empty()) { + CHECK_GT(fwrite(&contents[0], contents.size() * sizeof(contents[0]), 1, fp), + 0U); + } + fclose(fp); + return true; +} + +bool FileToBuffer(const string& filename, std::vector* contents) { + FILE* fp = fopen(filename.c_str(), "rb"); + if (!fp) + return false; + long int file_size = quipper::GetFileSizeFromHandle(fp); + contents->resize(file_size); + // Do not read anything if the file exists but is empty. + if (file_size > 0) + CHECK_GT(fread(&(*contents)[0], file_size, 1, fp), 0U); + fclose(fp); + return true; +} + +string HexToString(const u8* array, size_t length) { + // Convert the bytes to hex digits one at a time. + // There will be kNumHexDigitsInByte hex digits, and 1 char for NUL. + char buffer[kNumHexDigitsInByte + 1]; + string result = ""; + for (size_t i = 0; i < length; ++i) { + snprintf(buffer, sizeof(buffer), "%02x", array[i]); + result += buffer; + } + return result; +} + +bool StringToHex(const string& str, u8* array, size_t length) { + const int kHexRadix = 16; + char* err; + // Loop through kNumHexDigitsInByte characters at a time (to get one byte) + // Stop when there are no more characters, or the array has been filled. + for (size_t i = 0; + (i + 1) * kNumHexDigitsInByte <= str.size() && i < length; + ++i) { + string one_byte = str.substr(i * kNumHexDigitsInByte, kNumHexDigitsInByte); + array[i] = strtol(one_byte.c_str(), &err, kHexRadix); + if (*err) + return false; + } + return true; +} + +uint64 AlignSize(uint64 size, uint32 align_size) { + return ((size + align_size - 1) / align_size) * align_size; +} + +// In perf data, strings are packed into the smallest number of 8-byte blocks +// possible, including the null terminator. +// e.g. +// "0123" -> 5 bytes -> packed into 8 bytes +// "0123456" -> 8 bytes -> packed into 8 bytes +// "01234567" -> 9 bytes -> packed into 16 bytes +// "0123456789abcd" -> 15 bytes -> packed into 16 bytes +// "0123456789abcde" -> 16 bytes -> packed into 16 bytes +// "0123456789abcdef" -> 17 bytes -> packed into 24 bytes +// +// Returns the size of the 8-byte-aligned memory for storing |string|. +size_t GetUint64AlignedStringLength(const string& str) { + return AlignSize(str.size() + 1, sizeof(uint64)); +} + +uint64 GetSampleFieldsForEventType(uint32 event_type, uint64 sample_type) { + uint64 mask = kuint64max; + switch (event_type) { + case PERF_RECORD_SAMPLE: + // IP and pid/tid fields of sample events are read as part of event_t, so + // mask away those two fields. + mask = ~(PERF_SAMPLE_IP | PERF_SAMPLE_TID); + break; + case PERF_RECORD_MMAP: + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: + case PERF_RECORD_COMM: + case PERF_RECORD_LOST: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: + mask = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_ID | + PERF_SAMPLE_CPU; + break; + case PERF_RECORD_READ: + break; + default: + LOG(FATAL) << "Unknown event type " << event_type; + } + return sample_type & mask; +} + +uint64 GetPerfSampleDataOffset(const event_t& event) { + uint64 offset = kuint64max; + switch (event.header.type) { + case PERF_RECORD_SAMPLE: + offset = sizeof(event.ip); + break; + case PERF_RECORD_MMAP: + offset = sizeof(event.mmap) - sizeof(event.mmap.filename) + + GetUint64AlignedStringLength(event.mmap.filename); + break; + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: + offset = sizeof(event.fork); + break; + case PERF_RECORD_COMM: + offset = sizeof(event.comm) - sizeof(event.comm.comm) + + GetUint64AlignedStringLength(event.comm.comm); + break; + case PERF_RECORD_LOST: + offset = sizeof(event.lost); + break; + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: + offset = sizeof(event.throttle); + break; + case PERF_RECORD_READ: + offset = sizeof(event.read); + break; + default: + LOG(FATAL) << "Unknown event type " << event.header.type; + break; + } + // Make sure the offset was valid + CHECK_NE(offset, kuint64max); + CHECK_EQ(offset % sizeof(uint64), 0U); + return offset; +} + +bool ReadFileToData(const string& filename, std::vector* data) { + std::ifstream in(filename.c_str(), std::ios::binary); + if (!in.good()) { + LOG(ERROR) << "Failed to open file " << filename; + return false; + } + in.seekg(0, in.end); + size_t length = in.tellg(); + in.seekg(0, in.beg); + data->resize(length); + + in.read(&(*data)[0], length); + + if (!in.good()) { + LOG(ERROR) << "Error reading from file " << filename; + return false; + } + return true; +} + +bool WriteDataToFile(const std::vector& data, const string& filename) { + std::ofstream out(filename.c_str(), std::ios::binary); + out.seekp(0, std::ios::beg); + out.write(&data[0], data.size()); + return out.good(); +} + +bool RunCommandAndGetStdout(const string& command, std::vector* output) { + FILE* fp = popen(command.c_str(), "r"); + if (!fp) + return false; + + output->clear(); + char buf[kFileReadSize]; + while (!feof(fp)) { + size_t size_read = fread(buf, 1, sizeof(buf), fp); + size_t prev_size = output->size(); + output->resize(prev_size + size_read); + memcpy(&(*output)[prev_size], buf, size_read); + } + if (pclose(fp)) + return false; + + return true; +} + +} // namespace quipper Index: lib/ProfileData/PerfConverter/sample_reader.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/sample_reader.cpp @@ -0,0 +1,127 @@ +//=-- sample_reader.cc - Read samples from the profile ----------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/ProfileData/SampleProfileReader.h" + +#include +#include + +#include "quipper/perf_parser.h" + +namespace { +// Returns true if name equals full_name, or full_name is empty and name +// matches re. +bool MatchBinary(const string &name, const string &full_name) { + return full_name == basename(name.c_str()); +} +} // namespace + +namespace samplepgo { +set SampleReader::getSampledAddresses() const { + set addrs; + if (RangeCount.size() > 0) { + for (const auto &range_count : RangeCount) { + addrs.insert(range_count.first.first); + } + } else { + for (const auto &addr_count : AddressCount) { + addrs.insert(addr_count.first); + } + } + return addrs; +} + +uint64_t SampleReader::getSampleCount(uint64_t addr) const { + AddressCountMap::const_iterator iter = AddressCount.find(addr); + if (iter == AddressCount.end()) + return 0; + + return iter->second; +} + +uint64_t SampleReader::getTotalSampleCount() const { + uint64_t ret = 0; + + if (RangeCount.size() > 0) { + for (const auto &range_count : RangeCount) { + ret += range_count.second; + } + } else { + for (const auto &addr_count : AddressCount) { + ret += addr_count.second; + } + } + return ret; +} + +bool SampleReader::readAndSetMaxCount() { + if (!read()) { + return false; + } + if (RangeCount.size() > 0) { + for (const auto &range_count : RangeCount) { + MaxCount = max(MaxCount, range_count.second); + } + } else { + for (const auto &addr_count : AddressCount) { + if (addr_count.second > MaxCount) { + MaxCount = addr_count.second; + } + } + } + return true; +} + +bool FileSampleReader::read() { return append(ProfileFile); } + +bool PerfDataSampleReader::append(StringRef ProfileFile) { + quipper::PerfParser parser; + if (!parser.ReadFile(ProfileFile) || !parser.ParseRawEvents()) { + return false; + } + + string focus_binary = FocusBinaryRE; + + // If we can find build_id from binary, and the exact build_id was found + // in the profile, then we use focus_binary to match samples. Otherwise, + // focus_binary_re_ is used to match the binary name with the samples. + for (const auto &event : parser.parsed_events()) { + if (!*event.raw_event || + (*event.raw_event)->header.type != PERF_RECORD_SAMPLE) { + continue; + } + if (MatchBinary(event.dso_and_offset.dso_name(), focus_binary)) { + AddressCount[event.dso_and_offset.offset()]++; + } + if (event.branch_stack.size() > 0 && + MatchBinary(event.branch_stack[0].to.dso_name(), focus_binary) && + MatchBinary(event.branch_stack[0].from.dso_name(), focus_binary)) { + BranchCount[Branch(event.branch_stack[0].from.offset(), + event.branch_stack[0].to.offset())]++; + } + for (unsigned i = 1; i < event.branch_stack.size(); i++) { + if (!MatchBinary(event.branch_stack[i].to.dso_name(), focus_binary)) { + continue; + } + uint64_t begin = event.branch_stack[i].to.offset(); + uint64_t end = event.branch_stack[i - 1].from.offset(); + // The interval between two taken branches should not be too large. + if (end < begin || end - begin > (1 << 20)) { + LOG(WARNING) << "Bogus LBR data: " << begin << "->" << end; + continue; + } + RangeCount[Range(begin, end)]++; + if (MatchBinary(event.branch_stack[i].from.dso_name(), focus_binary)) { + BranchCount[Branch(event.branch_stack[i].from.offset(), + event.branch_stack[i].to.offset())]++; + } + } + } + return true; +} +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/source_info.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/source_info.h @@ -0,0 +1,59 @@ +//=-- source_info.h - Representation of source information ------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SOURCE_INFO_H_ +#define AUTOFDO_SOURCE_INFO_H_ + +#include +#include + +#include "llvm/Support/DataTypes.h" + +using namespace std; + +namespace samplepgo { + +// Represents the source position. +struct SourceInfo { + SourceInfo() + : func_name(NULL), dir_name(NULL), file_name(NULL), start_line(0), + line(0), discriminator(0) {} + + SourceInfo(const char *func_name, const char *dir_name, const char *file_name, + uint32_t start_line, uint32_t line, uint32_t discriminator) + : func_name(func_name), dir_name(dir_name), file_name(file_name), + start_line(start_line), line(line), discriminator(discriminator) {} + + bool operator<(const SourceInfo &p) const; + + string RelativePath() const { + if (dir_name && *dir_name) + return string(dir_name) + "/" + string(file_name); + if (file_name) + return string(file_name); + return string(); + } + + uint32_t Offset() const { + return ((line - start_line) << 16) | discriminator; + } + + bool Malformed() const { return line < start_line; } + + const char *func_name; + const char *dir_name; + const char *file_name; + uint32_t start_line; + uint32_t line; + uint32_t discriminator; +}; + +typedef vector SourceStack; +} // namespace samplepgo + +#endif // AUTOFDO_SOURCE_INFO_H_ Index: lib/ProfileData/PerfConverter/source_info.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/source_info.cpp @@ -0,0 +1,46 @@ +//=-- source_info.cc - Representation of source information -----------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include + +#include "source_info.h" + +namespace { +int StrcmpMaybeNull(const char *a, const char *b) { + if (a == nullptr) { + a = ""; + } + if (b == nullptr) { + b = ""; + } + return strcmp(a, b); +} +} // namespace + +namespace samplepgo { +bool SourceInfo::operator<(const SourceInfo &p) const { + if (line != p.line) { + return line < p.line; + } + if (start_line != p.start_line) { + return start_line < p.start_line; + } + if (discriminator != p.discriminator) { + return discriminator < p.discriminator; + } + int ret = StrcmpMaybeNull(func_name, p.func_name); + if (ret != 0) { + return ret < 0; + } + ret = StrcmpMaybeNull(file_name, p.file_name); + if (ret != 0) { + return ret < 0; + } + return StrcmpMaybeNull(dir_name, p.dir_name) < 0; +} +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/symbol_map.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbol_map.h @@ -0,0 +1,264 @@ +//=-- symbol_map.h - Class to represent the symbol map ----------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// The symbol map is a map from symbol names to the symbol class. +// This class is thread-safe. +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOL_MAP_H_ +#define AUTOFDO_SYMBOL_MAP_H_ + +#include +#include +#include +#include +#include + +#include + +#include "llvm/Support/DataTypes.h" + +#include "source_info.h" + +namespace samplepgo { + +typedef map CallTargetCountMap; +typedef pair TargetCountPair; +typedef vector TargetCountPairs; + +class Addr2line; + +// Returns a sorted vector of target_count pairs. target_counts is a pointer +// to an empty vector in which the output will be stored. +// Sorting is based on count in descending order. +void GetSortedTargetCountPairs(const CallTargetCountMap &call_target_count_map, + TargetCountPairs *target_counts); + +// Represents profile information of a given source. +class ProfileInfo { +public: + ProfileInfo() : count(0), num_inst(0) {} + ProfileInfo &operator+=(const ProfileInfo &other); + + uint64_t count; + uint64_t num_inst; + CallTargetCountMap target_map; +}; + +// Map from a source location (represented by offset+discriminator) to profile. +typedef map PositionCountMap; + +// callsite_location, callee_name +typedef pair Callsite; + +struct CallsiteLess { + bool operator()(const Callsite &c1, const Callsite &c2) const { + if (c1.first != c2.first) + return c1.first < c2.first; + if ((c1.second == NULL || c2.second == NULL)) + return c1.second < c2.second; + return strcmp(c1.second, c2.second) < 0; + } +}; +class Symbol; +// Map from a callsite to the callee symbol. +typedef map CallsiteMap; + +// Contains information about a specific symbol. +// There are two types of symbols: +// 1. Actual symbol: the symbol exists in the binary as a standalone function. +// It has the begin_address and end_address, and its name +// is always full assembler name. +// 2. Inlined symbol: the symbol is cloned in another function. It does not +// have the begin_address and end_address, and its name +// could be a short bfd_name. +class Symbol { +public: + // This constructor is used to create inlined symbol. + Symbol(const char *name, const char *dir, const char *file, uint32_t start) + : info(SourceInfo(name, dir, file, start, 0, 0)), total_count(0), + head_count(0) {} + + Symbol() : total_count(0), head_count(0) {} + + ~Symbol(); + + // Merges profile stored in src symbol with this symbol. + void Merge(const Symbol *src); + + // Returns the module name of the symbol. Module name is the source file + // that the symbol belongs to. It is an attribute of the actual symbol. + string ModuleName() const; + + // Returns true if the symbol is from a header file. + bool IsFromHeader() const; + + // Dumps content of the symbol with a give indentation. + void Dump(int indent) const; + + // Source information about the the symbol (func_name, file_name, etc.) + SourceInfo info; + // The total sampled count. + uint64_t total_count; + // The total sampled count in the head bb. + uint64_t head_count; + // Map from callsite location to callee symbol. + CallsiteMap callsites; + // Map from source location to count and instruction number. + PositionCountMap pos_counts; +}; + +// Maps function name to actual symbol. (Top level map). +typedef map NameSymbolMap; +// Maps symbol's start address to its name and size. +typedef map> AddressSymbolMap; +// Maps from symbol's name to its start address. +typedef map NameAddressMap; +// Maps function name to alias names. +typedef map> NameAliasMap; + +// SymbolMap stores the symbols in the binary, and maintains +// a map from symbol name to its related information. +class SymbolMap { +public: + explicit SymbolMap(const string &binary) : binary_(binary), base_addr_(0) { + BuildSymbolMap(); + BuildNameAddressMap(); + } + + explicit SymbolMap() {} + + ~SymbolMap(); + + uint64_t size() const { return map_.size(); } + + // Returns total sample counts in the profile. + uint64_t TotalCount() const; + + // Returns relocation start address. + uint64_t base_addr() const { return base_addr_; } + + // Adds an empty named symbol. + void AddSymbol(const string &name); + + const NameSymbolMap &map() const { return map_; } + + uint64_t GetSymbolStartAddr(const string &name) const { + const auto &iter = name_addr_map_.find(name); + if (iter == name_addr_map_.end()) { + return 0; + } + return iter->second; + } + + const Symbol *GetSymbolByName(const string &name) const { + NameSymbolMap::const_iterator ret = map_.find(name); + if (ret != map_.end()) { + return ret->second; + } else { + return NULL; + } + } + + // Merges symbols with suffixes like .isra, .part as a single symbol. + void Merge(); + + // Increments symbol's entry count. + void AddSymbolEntryCount(const string &symbol, uint64_t count); + + typedef enum { INVALID = 1, SUM, MAX } Operation; + // Increments source stack's count. + // symbol: name of the symbol in which source is located. + // source: source location (in terms of inlined source stack). + // count: total sampled count. + // num_inst: number of instructions that is mapped to the source. + // op: operation used to calculate count (SUM or MAX). + void AddSourceCount(const string &symbol, const SourceStack &source, + uint64_t count, uint64_t num_inst, Operation op); + + // Adds the indirect call target to source stack. + // symbol: name of the symbol in which source is located. + // source: source location (in terms of inlined source stack). + // target: indirect call target. + // count: total sampled count. + void AddIndirectCallTarget(const string &symbol, const SourceStack &source, + const string &target, uint64_t count); + + // Traverses the inline stack in source, update the symbol map by adding + // count to the total count in the inlined symbol. Returns the leaf symbol. + Symbol *TraverseInlineStack(const string &symbol, const SourceStack &source, + uint64_t count); + + // Updates function name, start_addr, end_addr of a function that has a + // given address. Returns false if no such symbol exists. + bool GetSymbolInfoByAddr(uint64_t addr, const string **name, + uint64_t *start_addr, uint64_t *end_addr) const; + + // Returns a pointer to the symbol name for a given start address. Returns + // NULL if no such symbol exists. + const string *GetSymbolNameByStartAddr(uint64_t address) const; + + // Returns the overlap between two symbol maps. For two profiles, if + // count_i_j denotes the function count of the ith function in profile j; + // total_j denotes the total count of all functions in profile j. Then + // overlap = sum(min(count_i_1/total_1, count_i_2/total_2)) + float Overlap(const SymbolMap &map) const; + + // Iterates the address count map to calculate the working set of the profile. + // Working set is a map from bucket_num to total number of instructions that + // consumes bucket_num/NUM_GCOV_WORKING_SETS of dynamic instructions. This + // mapping indicates how large is the dynamic hot code region during run time. + // + // To compute working set, the following algorithm is used: + // + // Input: map from instruction to execution count. + // Output: working set. + // 1. compute histogram: map (execution count --> number of instructions) + // 2. traverse the histogram in decending order + // 2.1 calculate accumulated_count. + // 2.2 compute the working set bucket number. + // 2.3 update the working set bucket from last update to calculated bucket + // number. + void ComputeWorkingSets(); + + // Updates the symbol from new binary. + // * reads the module info stored by "-frecord-compilation-info-in-elf". + // * updates each symbol's module info from the debug info stored in + // addr2line. + // * re-groups the module from the updated module info. + void UpdateSymbolMap(const string &binary, const Addr2line *addr2line); + + // Returns a map from start addresses of functions that have been sampled to + // the size of the function. + ::map + GetSampledSymbolStartAddressSizeMap(const set &sampled_addrs) const; + + void Dump() const; + void DumpFuncLevelProfileCompare(const SymbolMap &map) const; + +private: + // Reads from the binary's elf section to build the symbol map. + void BuildSymbolMap(); + + // Reads from address_symbol_map_ and update name_addr_map_. + void BuildNameAddressMap() { + for (const auto &addr_symbol : address_symbol_map_) { + name_addr_map_[addr_symbol.second.first] = addr_symbol.first; + } + } + + NameSymbolMap map_; + NameAliasMap name_alias_map_; + NameAddressMap name_addr_map_; + AddressSymbolMap address_symbol_map_; + const string binary_; + uint64_t base_addr_; +}; + +} // namespace samplepgo + +#endif // AUTOFDO_SYMBOL_MAP_H_ Index: lib/ProfileData/PerfConverter/symbol_map.cpp =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbol_map.cpp @@ -0,0 +1,527 @@ +//=-- symbol_map.cc - Class to represent the symbol map ---------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include +#include +#include + +#include "addr2line.h" +#include "symbol_map.h" +#include "symbolize/elf_reader.h" + +int32 FLAGS_dump_cutoff_percent = 2; +/*DEFINE_int32(dump_cutoff_percent, 2, + "functions that has total count lower than this percentage of " + "the max function count will not show in the dump");*/ + +namespace { +// Returns whether str ends with suffix. +inline bool HasSuffixString(const string &str, const string &suffix) { + uint32 len = suffix.size(); + uint32 offset = str.size() - suffix.size(); + return str.substr(offset, len) == suffix; +} + +string GetOriginalName(const char *name) { + const char *split = strchr(name, '.'); + if (split) { + return string(name, split - name); + } else { + return string(name); + } +} + +// Prints some blank space for identation. +void Identation(int ident) { + for (int i = 0; i < ident; i++) { + printf(" "); + } +} + +void PrintSourceLocation(uint32 start_line, uint32 offset, int ident) { + Identation(ident); + if (offset & 0xffff) { + printf("%u.%u: ", (offset >> 16) + start_line, offset & 0xffff); + } else { + printf("%u: ", (offset >> 16) + start_line); + } +} +} // namespace + +namespace samplepgo { +ProfileInfo &ProfileInfo::operator+=(const ProfileInfo &s) { + count += s.count; + num_inst += s.num_inst; + for (const auto &target_count : s.target_map) { + target_map[target_count.first] += target_count.second; + } + return *this; +} + +struct TargetCountCompare { + bool operator()(const TargetCountPair &t1, const TargetCountPair &t2) const { + if (t1.second != t2.second) { + return t1.second > t2.second; + } else { + return t1.first > t2.first; + } + } +}; + +void GetSortedTargetCountPairs(const CallTargetCountMap &call_target_count_map, + TargetCountPairs *target_counts) { + for (const auto &name_count : call_target_count_map) { + target_counts->push_back(name_count); + } + sort(target_counts->begin(), target_counts->end(), TargetCountCompare()); +} + +SymbolMap::~SymbolMap() { + // Different keys (function names) may map to a same symbol. + // In order to prevent double free, we first merge all symbols + // into a set, then remove every symbol from the set. + set delete_set; + for (NameSymbolMap::iterator iter = map_.begin(); iter != map_.end(); + ++iter) { + delete_set.insert(iter->second); + } + for (const auto &symbol : delete_set) { + delete symbol; + } +} + +Symbol::~Symbol() { + for (auto &callsite_symbol : callsites) { + delete callsite_symbol.second; + } +} + +void Symbol::Merge(const Symbol *other) { + total_count += other->total_count; + head_count += other->head_count; + if (info.file_name == NULL) { + info.file_name = other->info.file_name; + info.dir_name = other->info.dir_name; + } + for (const auto &pos_count : other->pos_counts) + pos_counts[pos_count.first] += pos_count.second; + // Traverses all callsite, recursively Merge the callee symbol. + for (const auto &callsite_symbol : other->callsites) { + pair ret = + callsites.insert(CallsiteMap::value_type(callsite_symbol.first, NULL)); + // If the callsite does not exist in the current symbol, create a + // new callee symbol with the clone's function name. + if (ret.second) { + ret.first->second = new Symbol(); + ret.first->second->info.func_name = ret.first->first.second; + } + ret.first->second->Merge(callsite_symbol.second); + } +} + +void SymbolMap::Merge() { + for (auto &name_symbol : map_) { + string name = GetOriginalName(name_symbol.first.c_str()); + pair ret = + map_.insert(NameSymbolMap::value_type(name, NULL)); + if (ret.second) { + ret.first->second = new Symbol(); + ret.first->second->info.func_name = ret.first->first.c_str(); + } + if (ret.first->second != name_symbol.second) { + ret.first->second->Merge(name_symbol.second); + name_symbol.second->total_count = 0; + name_symbol.second->head_count = 0; + } + } +} + +void SymbolMap::AddSymbol(const string &name) { + pair ret = + map_.insert(NameSymbolMap::value_type(name, NULL)); + if (ret.second) { + ret.first->second = new Symbol(ret.first->first.c_str(), NULL, NULL, 0); + NameAliasMap::const_iterator alias_iter = name_alias_map_.find(name); + if (alias_iter != name_alias_map_.end()) { + for (const auto &name : alias_iter->second) { + map_[name] = ret.first->second; + } + } + } +} + +uint64 SymbolMap::TotalCount() const { + uint64 total_count = 0; + for (const auto &name_symbol : map_) { + total_count += name_symbol.second->total_count; + } + return total_count; +} + +bool SymbolMap::GetSymbolInfoByAddr(uint64 addr, const string **name, + uint64 *start_addr, + uint64 *end_addr) const { + AddressSymbolMap::const_iterator ret = address_symbol_map_.upper_bound(addr); + if (ret == address_symbol_map_.begin()) { + return false; + } + ret--; + if (addr >= ret->first && addr < ret->first + ret->second.second) { + if (name) { + *name = &ret->second.first; + } + if (start_addr) { + *start_addr = ret->first; + } + if (end_addr) { + *end_addr = ret->first + ret->second.second; + } + return true; + } else { + return false; + } +} + +const string *SymbolMap::GetSymbolNameByStartAddr(uint64 addr) const { + AddressSymbolMap::const_iterator ret = address_symbol_map_.find(addr); + if (ret == address_symbol_map_.end()) { + return NULL; + } + return &ret->second.first; +} + +class SymbolReader : public autofdo::ElfReader::SymbolSink { +public: + explicit SymbolReader(NameAliasMap *name_alias_map, + AddressSymbolMap *address_symbol_map) + : name_alias_map_(name_alias_map), + address_symbol_map_(address_symbol_map) {} + virtual void AddSymbol(const char *name, uint64 address, uint64 size) { + if (size == 0) { + return; + } + pair ret = address_symbol_map_->insert( + make_pair(address, make_pair(string(name), size))); + if (!ret.second) { + (*name_alias_map_)[ret.first->second.first].insert(name); + } + } + virtual ~SymbolReader() {} + +private: + NameAliasMap *name_alias_map_; + AddressSymbolMap *address_symbol_map_; + + DISALLOW_COPY_AND_ASSIGN(SymbolReader); +}; + +void SymbolMap::BuildSymbolMap() { + autofdo::ElfReader elf_reader(binary_); + base_addr_ = elf_reader.VaddrOfFirstLoadSegment(); + SymbolReader symbol_reader(&name_alias_map_, &address_symbol_map_); + + elf_reader.VisitSymbols(&symbol_reader); +} + +void SymbolMap::UpdateSymbolMap(const string &binary, + const Addr2line *addr2line) { + SymbolMap new_map(binary); + + for (auto iter = map_.begin(); iter != map_.end(); ++iter) { + uint64 addr = new_map.GetSymbolStartAddr(iter->first); + if (addr == 0) { + continue; + } + SourceStack stack; + addr2line->GetInlineStack(addr, &stack); + if (stack.size() != 0) { + iter->second->info.file_name = stack[stack.size() - 1].file_name; + iter->second->info.dir_name = stack[stack.size() - 1].dir_name; + } + } +} + +string Symbol::ModuleName() const { + // This is a special case in Google3, though tcmalloc.cc has a suffix of .cc, + // it's actually no a module, but included by tcmalloc_or_debug.cc, which is + // a pure wrapper. Thus when a function is found to belong to module + // tcmalloc.cc, it should be reattributed to the wrapper module. + if (info.RelativePath() == "./tcmalloc/tcmalloc.cc") { + return "tcmalloc/tcmalloc_or_debug.cc"; + } else { + return info.RelativePath(); + } +} + +bool Symbol::IsFromHeader() const { + if (HasSuffixString(ModuleName(), ".c") || + HasSuffixString(ModuleName(), ".cc") || + HasSuffixString(ModuleName(), ".C") || + HasSuffixString(ModuleName(), ".cpp")) { + return false; + } else if (HasSuffixString(ModuleName(), ".h")) { + return true; + } else { + LOG(WARNING) << ModuleName() << " has unknown suffix."; + // If suffix is unknown, we think it is from header so that the module + // will not be considered in module grouping. + return true; + } +} + +void SymbolMap::AddSymbolEntryCount(const string &symbol_name, uint64 count) { + Symbol *symbol = map_.find(symbol_name)->second; + symbol->head_count = max(symbol->head_count, count); +} + +Symbol *SymbolMap::TraverseInlineStack(const string &symbol_name, + const SourceStack &src, uint64 count) { + Symbol *symbol = map_.find(symbol_name)->second; + symbol->total_count += count; + const SourceInfo &info = src[src.size() - 1]; + if (symbol->info.file_name == NULL && info.file_name != NULL) { + symbol->info.file_name = info.file_name; + symbol->info.dir_name = info.dir_name; + } + for (int i = src.size() - 1; i > 0; i--) { + pair ret = + symbol->callsites.insert(CallsiteMap::value_type( + Callsite(src[i].Offset(), src[i - 1].func_name), NULL)); + if (ret.second) { + ret.first->second = + new Symbol(src[i - 1].func_name, src[i - 1].dir_name, + src[i - 1].file_name, src[i - 1].start_line); + } + symbol = ret.first->second; + symbol->total_count += count; + } + return symbol; +} + +void SymbolMap::AddSourceCount(const string &symbol_name, + const SourceStack &src, uint64 count, + uint64 num_inst, Operation op) { + if (src.size() == 0 || src[0].Malformed()) { + return; + } + Symbol *symbol = TraverseInlineStack(symbol_name, src, count); + if (op == MAX) { + if (count > symbol->pos_counts[src[0].Offset()].count) { + symbol->pos_counts[src[0].Offset()].count = count; + } + } else if (op == SUM) { + symbol->pos_counts[src[0].Offset()].count += count; + } else { + LOG(FATAL) << "op not supported."; + } + symbol->pos_counts[src[0].Offset()].num_inst += num_inst; +} + +void SymbolMap::AddIndirectCallTarget(const string &symbol_name, + const SourceStack &src, + const string &target, uint64 count) { + Symbol *symbol = TraverseInlineStack(symbol_name, src, 0); + symbol->pos_counts[src[0].Offset()] + .target_map[GetOriginalName(target.c_str())] = count; +} + +struct CallsiteLessThan { + bool operator()(const Callsite &c1, const Callsite &c2) const { + if (c1.first != c2.first) + return c1.first < c2.first; + if ((c1.second == NULL || c2.second == NULL)) + return c1.second == NULL; + return strcmp(c1.second, c2.second) < 0; + } +}; + +void Symbol::Dump(int ident) const { + if (ident == 0) { + printf("%s total:%llu head:%llu\n", info.func_name, + static_cast(total_count), + static_cast(head_count)); + } else { + printf("%s total:%llu\n", info.func_name, + static_cast(total_count)); + } + vector positions; + for (const auto &pos_count : pos_counts) + positions.push_back(pos_count.first); + sort(positions.begin(), positions.end()); + for (const auto &pos : positions) { + PositionCountMap::const_iterator ret = pos_counts.find(pos); + DCHECK(ret != pos_counts.end()); + PrintSourceLocation(info.start_line, pos, ident + 2); + printf("%llu", static_cast(ret->second.count)); + TargetCountPairs target_count_pairs; + GetSortedTargetCountPairs(ret->second.target_map, &target_count_pairs); + for (const auto &target_count : target_count_pairs) { + printf(" %s:%llu", target_count.first.c_str(), + static_cast(target_count.second)); + } + printf("\n"); + } + vector calls; + for (const auto &pos_symbol : callsites) { + calls.push_back(pos_symbol.first); + } + sort(calls.begin(), calls.end(), CallsiteLessThan()); + for (const auto &callsite : calls) { + PrintSourceLocation(info.start_line, callsite.first, ident + 2); + callsites.find(callsite)->second->Dump(ident + 2); + } +} + +void SymbolMap::Dump() const { + std::map> count_names_map; + for (const auto &name_symbol : map_) { + if (name_symbol.second->total_count > 0) { + count_names_map[~name_symbol.second->total_count].push_back( + name_symbol.first); + } + } + for (const auto &count_names : count_names_map) { + for (const auto &name : count_names.second) { + Symbol *symbol = map_.find(name)->second; + symbol->Dump(0); + } + } +} + +float SymbolMap::Overlap(const SymbolMap &map) const { + std::map> overlap_map; + + // Prepare for overlap_map + uint64 total_1 = 0; + uint64 total_2 = 0; + for (const auto &name_symbol : map_) { + total_1 += name_symbol.second->total_count; + overlap_map[name_symbol.first].first = name_symbol.second->total_count; + overlap_map[name_symbol.first].second = 0; + } + for (const auto &name_symbol : map.map()) { + if (overlap_map.find(name_symbol.first) == overlap_map.end()) { + overlap_map[name_symbol.first].first = 0; + } + total_2 += name_symbol.second->total_count; + overlap_map[name_symbol.first].second = name_symbol.second->total_count; + } + + if (total_1 == 0 || total_2 == 0) { + return 0.0; + } + + // Calculate the overlap + float overlap = 0.0; + for (const auto &name_counts : overlap_map) { + overlap += + std::min(static_cast(name_counts.second.first) / total_1, + static_cast(name_counts.second.second) / total_2); + } + return overlap; +} + +void SymbolMap::DumpFuncLevelProfileCompare(const SymbolMap &map) const { + uint64 max_1 = 0; + uint64 max_2 = 0; + + // Calculate the max of the two maps + for (const auto &name_symbol : map_) { + max_1 = std::max(name_symbol.second->total_count, max_1); + } + for (const auto &name_symbol : map.map()) { + max_2 = std::max(name_symbol.second->total_count, max_2); + } + + // Sort map_1 + std::map> count_names_map; + for (const auto &name_symbol : map_) { + if (name_symbol.second->total_count > 0) { + count_names_map[name_symbol.second->total_count].push_back( + name_symbol.first); + } + } + // Dump hot functions in map_1 + for (auto count_names_iter = count_names_map.rbegin(); + count_names_iter != count_names_map.rend(); ++count_names_iter) { + for (const auto &name : count_names_iter->second) { + Symbol *symbol = map_.find(name)->second; + if (symbol->total_count * 100 < max_1 * FLAGS_dump_cutoff_percent) { + break; + } + + const auto &iter = map.map().find(name); + uint64 compare_count = 0; + if (iter != map.map().end()) { + compare_count = iter->second->total_count; + } + printf("%llu%% %llu%% %s\n", + static_cast(symbol->total_count * 100 / max_1), + static_cast(compare_count * 100 / max_2), + name.c_str()); + } + } + + // Sort map_2 + count_names_map.clear(); + for (const auto &name_symbol : map.map()) { + if (name_symbol.second->total_count > 0) { + count_names_map[name_symbol.second->total_count].push_back( + name_symbol.first); + } + } + // Dump hot functions in map_2 that was not caught. + for (auto count_names_iter = count_names_map.rbegin(); + count_names_iter != count_names_map.rend(); ++count_names_iter) { + for (const auto &name : count_names_iter->second) { + Symbol *symbol = map.map().find(name)->second; + if (symbol->total_count * 100 < max_2 * FLAGS_dump_cutoff_percent) { + break; + } + + const auto &iter = map_.find(name); + uint64 compare_count = 0; + if (iter != map.map().end()) { + compare_count = iter->second->total_count; + if (compare_count * 100 >= max_1 * FLAGS_dump_cutoff_percent) { + continue; + } + } + printf("%llu%% %llu%% %s\n", + static_cast(compare_count * 100 / max_1), + static_cast(symbol->total_count * 100 / max_2), + name.c_str()); + } + } +} + +typedef map Histogram; + +::map SymbolMap::GetSampledSymbolStartAddressSizeMap( + const set &sampled_addrs) const { + // We depend on the fact that sampled_addrs is an ordered set. + ::map ret; + uint64 next_start_addr = 0; + for (const auto &addr : sampled_addrs) { + uint64 adjusted_addr = addr + base_addr_; + if (adjusted_addr < next_start_addr) { + continue; + } + + AddressSymbolMap::const_iterator iter = + address_symbol_map_.upper_bound(adjusted_addr); + if (iter == address_symbol_map_.begin()) { + continue; + } + iter--; + ret.insert(make_pair(iter->first, iter->second.second)); + next_start_addr = iter->first + iter->second.second; + } + return ret; +} +} // namespace samplepgo Index: lib/ProfileData/PerfConverter/symbolize/addr2line_inlinestack.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/addr2line_inlinestack.h @@ -0,0 +1,243 @@ +//=-- addr2line_inlinestack.h -----------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_ADDR2LINE_INLINESTACK_H_ +#define AUTOFDO_SYMBOLIZE_ADDR2LINE_INLINESTACK_H_ + +#include +#include +#include +#include + +#include "symbolize/dwarf2enums.h" +#include "symbolize/dwarf2reader.h" +#include "symbolize/dwarf3ranges.h" +#include "symbolize/nonoverlapping_range_map.h" + +namespace autofdo { + +class ByteReader; + +// This class tracks information about a DWARF subprogram as it +// pertains to computing line information for inlined functions. In +// particular, for each subprogram (i.e., function) the class tracks +// whether the function is an inlined copy. If so, the class stores +// the offset in the debug section of the non-inlined copy of the +// function, into what function it is inlined (parent_), the line +// number of the call site, and the file name of the call site. If it +// is not an inlined function, the function name is stored. In both +// cases, the address ranges occupied by the Subprogram are stored. +class SubprogramInfo { + public: + SubprogramInfo(int cu_index, uint64 offset, const SubprogramInfo *parent, + bool inlined) + : cu_index_(cu_index), offset_(offset), parent_(parent), name_(), + address_ranges_(0), inlined_(inlined), comp_directory_(NULL), + callsite_directory_(NULL), callsite_filename_(NULL), callsite_line_(0), + callsite_discr_(0), abstract_origin_(0), specification_(0), + used_(false) { } + + int cu_index() const { return cu_index_; } + + uint64 offset() const { return offset_; } + const SubprogramInfo *parent() const { return parent_; } + + void set_name(const char *name) { name_.assign(name); } + const string& name() const { return name_; } + + // Address ranges are specified in the DWARF file as a reference to + // a range list or a pair of DIEs specifying a single range. + // SwapAddressRanges is used for the first case, while the + // SetSingleton* methods are used for the latter. + void SwapAddressRanges(AddressRangeList::RangeList *ranges); + void SetSingletonRangeLow(uint64 addr); + void SetSingletonRangeHigh(uint64 addr, bool is_offset); + const AddressRangeList::RangeList *address_ranges() const { + return &address_ranges_; + } + + bool inlined() const { return inlined_; } + + void set_comp_directory(const char *dir) { + comp_directory_ = dir; + } + + const char *comp_directory() const {return comp_directory_;} + + void set_callsite_directory(const char *dir) { + callsite_directory_ = dir; + } + const char *callsite_directory() const { + return callsite_directory_; + } + + void set_callsite_filename(const char *file) { + callsite_filename_ = file; + } + const char *callsite_filename() const { + return callsite_filename_; + } + + // Returns a string representing the filename of this callsite. + // + // Args: + // basenames_only: just the filename + // with_comp_dir: prepend the compilation dir, if we have it. + string CallsiteFilename(bool basenames_only, bool with_comp_dir) const; + + void set_callsite_line(uint32 line) { callsite_line_ = line; } + uint32 callsite_line() const { return callsite_line_; } + + void set_callsite_discr(uint32 discr) { callsite_discr_ = discr; } + uint32 callsite_discr() const { return callsite_discr_; } + + // The abstract origin refers to the details of an out-of-line or + // inline concrete instance of an inline function. See + // http://www.dwarfstd.org for more details. + void set_abstract_origin(uint64 offset) { abstract_origin_ = offset; } + uint64 abstract_origin() const { return abstract_origin_; } + + void set_specification(uint64 offset) { specification_ = offset; } + uint64 specification() const { return specification_; } + + void set_used() { used_ = true; } + bool used() const { return used_; } + + private: + int cu_index_; + uint64 offset_; + const SubprogramInfo *parent_; + // The name may come from a .dwo file's string table, which will be + // destroyed before we're done, so we need to make a copy. + string name_; + AddressRangeList::RangeList address_ranges_; + bool inlined_; + const char *comp_directory_; // working dir of compilation. + const char *callsite_directory_; + const char *callsite_filename_; + uint32 callsite_line_; + uint32 callsite_discr_; + uint64 abstract_origin_; + uint64 specification_; + bool used_; + DISALLOW_COPY_AND_ASSIGN(SubprogramInfo); +}; + +// This class implements the callback interface used for reading DWARF +// debug information. It stores information about all the observed +// subprograms. The full set of subprograms can be retrieved, or the +// handler can be queried by address after all the debug information +// is read. Between compilation units the set_filename method should +// be called to point to the correct filename information for the +// compilation unit. +class InlineStackHandler: public Dwarf2Handler { + public: + InlineStackHandler( + AddressRangeList *address_ranges, + const SectionMap& sections, + ByteReader *reader) + : directory_names_(NULL), file_names_(NULL), line_handler_(NULL), + sections_(sections), reader_(reader), address_ranges_(address_ranges), + cu_index_(-1), subprograms_by_offset_maps_(), + compilation_unit_comp_dir_(), sampled_functions_(NULL) + { } + + InlineStackHandler( + AddressRangeList *address_ranges, + const SectionMap& sections, + ByteReader *reader, + const map *sampled_functions) + : directory_names_(NULL), file_names_(NULL), line_handler_(NULL), + sections_(sections), reader_(reader), address_ranges_(address_ranges), + cu_index_(-1), subprograms_by_offset_maps_(), + compilation_unit_comp_dir_(), sampled_functions_(sampled_functions) + { } + + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version); + + virtual bool StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs); + + virtual void EndDIE(uint64 offset); + + virtual void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char *data); + + virtual void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + + void set_directory_names( + const DirectoryVector *directory_names) { + directory_names_ = directory_names; + } + + void set_file_names(const FileVector *file_names) { + file_names_ = file_names; + } + + void set_line_handler(LineInfoHandler *handler) { + line_handler_ = handler; + } + + const SubprogramInfo *GetSubprogramForAddress(uint64 address); + + const SubprogramInfo *GetDeclaration(const SubprogramInfo *subprog) const; + + const SubprogramInfo *GetAbstractOrigin(const SubprogramInfo *subprog) const; + + // Puts the start addresses of all inlined subprograms into the given set. + void GetSubprogramAddresses(set *addrs); + + // Cleans up memory consumed by subprograms that are not used. + void CleanupUnusedSubprograms(); + + void PopulateSubprogramsByAddress(); + + ~InlineStackHandler(); + + private: + typedef map SubprogramsByOffsetMap; + + void FindBadSubprograms(set *bad_subprograms); + + const DirectoryVector *directory_names_; + const FileVector *file_names_; + LineInfoHandler *line_handler_; + const SectionMap& sections_; + ByteReader *reader_; + AddressRangeList *address_ranges_; + vector subprogram_stack_; + vector die_stack_; + // We keep a separate map from offset to SubprogramInfo for each CU, + // because when reading .dwo or .dwp files, the offsets are relative + // to the beginning of the debug info for that CU. + int cu_index_; + vector subprograms_by_offset_maps_; + vector subprogram_insert_order_; + NonOverlappingRangeMap subprograms_by_address_; + uint64 compilation_unit_offset_; + uint64 compilation_unit_base_; + // The comp dir name may come from a .dwo file's string table, which + // will be destroyed before we're done, so we need to copy it for + // each compilation unit. We need to keep a vector of all the + // directories that we've seen, because SubprogramInfo keeps + // StringPiece objects pointing to these copies. + vector compilation_unit_comp_dir_; + const map *sampled_functions_; + DISALLOW_COPY_AND_ASSIGN(InlineStackHandler); +}; + +} // namespace autofdo + +#endif // AUTOFDO_SYMBOLIZE_ADDR2LINE_INLINESTACK_H_ Index: lib/ProfileData/PerfConverter/symbolize/addr2line_inlinestack.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/addr2line_inlinestack.cc @@ -0,0 +1,490 @@ +//=-- addr2line_inlinestack.cc ----------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "symbolize/addr2line_inlinestack.h" + +#include + +#include "symbolize/bytereader.h" + +namespace autofdo { + +void SubprogramInfo::SwapAddressRanges(AddressRangeList::RangeList *ranges) { + address_ranges_.swap(*ranges); +} + +void SubprogramInfo::SetSingletonRangeLow(uint64 addr) { + if (address_ranges_.empty()) { + address_ranges_.push_back(make_pair(addr, 0ULL)); + } else { + CHECK_EQ(1, address_ranges_.size()); + address_ranges_[0].first = addr; + } +} + +void SubprogramInfo::SetSingletonRangeHigh(uint64 addr, bool is_offset) { + if (address_ranges_.empty()) { + address_ranges_.push_back(make_pair(0ULL, addr)); + } else { + CHECK_EQ(1, address_ranges_.size()); + if (is_offset) + address_ranges_[0].second = address_ranges_[0].first + addr; + else + address_ranges_[0].second = addr; + } +} + +bool InlineStackHandler::StartCompilationUnit(uint64 offset, + uint8 /*address_size*/, + uint8 /*offset_size*/, + uint64 /*cu_length*/, + uint8 /*dwarf_version*/) { + CHECK(subprogram_stack_.empty()); + compilation_unit_offset_ = offset; + compilation_unit_base_ = 0; + ++cu_index_; + subprograms_by_offset_maps_.push_back(new SubprogramsByOffsetMap); + CHECK(subprograms_by_offset_maps_.size() == (unsigned) cu_index_ + 1); + return true; +} + +void InlineStackHandler::CleanupUnusedSubprograms() { + SubprogramsByOffsetMap* subprograms_by_offset = + subprograms_by_offset_maps_.back(); + vector worklist; + for (const auto &offset_subprogram : *subprograms_by_offset) { + if (offset_subprogram.second->used()) { + worklist.push_back(offset_subprogram.second); + } + } + + while (worklist.size()) { + const SubprogramInfo *info = worklist.back(); + worklist.pop_back(); + uint64 specification = info->specification(); + uint64 abstract_origin = info->abstract_origin(); + if (specification) { + SubprogramInfo *info = + subprograms_by_offset->find(specification)->second; + if (!info->used()) { + info->set_used(); + worklist.push_back(info); + } + } + if (abstract_origin) { + SubprogramInfo *info = + subprograms_by_offset->find(abstract_origin)->second; + if (!info->used()) { + info->set_used(); + worklist.push_back(info); + } + } + } + + // Moves the actually used subprograms into a new map so that we can remove + // the entire original map to free memory. + SubprogramsByOffsetMap* new_map = new SubprogramsByOffsetMap(); + for (const auto &offset_subprogram : *subprograms_by_offset) { + if (offset_subprogram.second->used()) { + new_map->insert(offset_subprogram); + } else { + delete offset_subprogram.second; + } + } + delete subprograms_by_offset; + subprograms_by_offset_maps_.back() = new_map; +} + +bool InlineStackHandler::StartDIE(uint64 offset, + enum DwarfTag tag, + const AttributeList& attrs) { + die_stack_.push_back(tag); + + switch (tag) { + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: { + bool inlined = (tag == DW_TAG_inlined_subroutine); + SubprogramInfo *parent = + subprogram_stack_.empty() ? NULL : subprogram_stack_.back(); + SubprogramInfo *child = + new SubprogramInfo(cu_index_, offset, parent, inlined); + if (!compilation_unit_comp_dir_.empty()) + child->set_comp_directory(compilation_unit_comp_dir_.back()->c_str()); + SubprogramsByOffsetMap* subprograms_by_offset = + subprograms_by_offset_maps_.back(); + subprograms_by_offset->insert(make_pair(offset, child)); + subprogram_stack_.push_back(child); + return true; + } + case DW_TAG_compile_unit: + return true; + default: + return false; + } +} + +void InlineStackHandler::EndDIE(uint64 offset) { + DwarfTag die = die_stack_.back(); + die_stack_.pop_back(); + if (die == DW_TAG_subprogram || + die == DW_TAG_inlined_subroutine) { + // If the top level subprogram is used, we mark all subprograms in + // the subprogram_stack_ as used. + if (subprogram_stack_.front()->used()) { + subprogram_stack_.back()->set_used(); + } + if (!sampled_functions_ || subprogram_stack_.front()->used()) { + subprogram_insert_order_.push_back(subprogram_stack_.back()); + } + subprogram_stack_.pop_back(); + } + if (die == DW_TAG_compile_unit && sampled_functions_ != NULL) { + CleanupUnusedSubprograms(); + } +} + +void InlineStackHandler::ProcessAttributeString( + uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char *data) { + if (attr == DW_AT_comp_dir) { + compilation_unit_comp_dir_.emplace_back(new string(data)); + } + + if (!subprogram_stack_.empty()) { + // Use the mangled name if it exists, otherwise use the demangled name + if (attr == DW_AT_MIPS_linkage_name + || attr == DW_AT_linkage_name) { + subprogram_stack_.back()->set_name(data); + } else if (attr == DW_AT_name && + subprogram_stack_.back()->name().empty()) { + subprogram_stack_.back()->set_name(data); + } + } +} + +void InlineStackHandler::ProcessAttributeUnsigned( + uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + if (!subprogram_stack_.empty()) { + switch (attr) { + case DW_AT_call_file: { + if (data == 0 || data >= file_names_->size()) { + LOG(WARNING) << "unexpected reference to file_num " << data; + break; + } + + if (file_names_ != NULL) { + const FileVector::value_type &file = + (*file_names_)[data]; + if (directory_names_ != NULL) { + if ((unsigned)file.first < directory_names_->size()) { + const char *dir = (*directory_names_)[file.first]; + subprogram_stack_.back()->set_callsite_directory(dir); + } else { + LOG(WARNING) << "unexpected reference to dir_num " << file.first; + } + } + subprogram_stack_.back()->set_callsite_filename(file.second); + } + break; + } + case DW_AT_call_line: + CHECK(form == DW_FORM_data1 || + form == DW_FORM_data2 || + form == DW_FORM_data4); + subprogram_stack_.back()->set_callsite_line(data); + break; + case DW_AT_GNU_discriminator: + CHECK(form == DW_FORM_data1 || + form == DW_FORM_data2 || + form == DW_FORM_data4); + subprogram_stack_.back()->set_callsite_discr(data); + break; + case DW_AT_abstract_origin: + CHECK(form == DW_FORM_ref4); + subprogram_stack_.back()->set_abstract_origin( + compilation_unit_offset_ + data); + break; + case DW_AT_specification: + CHECK(form == DW_FORM_ref4); + subprogram_stack_.back()->set_specification( + compilation_unit_offset_ + data); + break; + case DW_AT_low_pc: + subprogram_stack_.back()->SetSingletonRangeLow(data); + // If a symbol's start address is in sampled_functions, we will + // mark the top level subprogram of this symbol as used. + if (sampled_functions_ != NULL && + subprogram_stack_.size() == 1 && + sampled_functions_->find(data) != sampled_functions_->end()) { + subprogram_stack_.front()->set_used(); + } + break; + case DW_AT_high_pc: + subprogram_stack_.back()->SetSingletonRangeHigh( + data, form != DW_FORM_addr); + break; + case DW_AT_ranges: { + CHECK_EQ(0, subprogram_stack_.back()->address_ranges()->size()); + AddressRangeList::RangeList ranges; + address_ranges_->ReadRangeList(data, compilation_unit_base_, &ranges); + subprogram_stack_.back()->SwapAddressRanges(&ranges); + if (sampled_functions_ != NULL && + subprogram_stack_.size() == 1 && + sampled_functions_->find(AddressRangeList::RangesMin(&ranges)) + != sampled_functions_->end()) { + subprogram_stack_.front()->set_used(); + } + break; + } + case DW_AT_decl_line: { + if (die_stack_.back() == DW_TAG_subprogram) { + subprogram_stack_.back()->set_callsite_line(data); + } + break; + } + default: + break; + } + } else if (die_stack_.back() == DW_TAG_compile_unit) { + // The subprogram stack is empty. This information is therefore + // describing the compilation unit. + switch (attr) { + case DW_AT_low_pc: + compilation_unit_base_ = data; + break; + case DW_AT_stmt_list: + { + SectionMap::const_iterator iter = sections_.find(".debug_line"); + CHECK(iter != sections_.end()) << "unable to find .debug_line " + "in section map"; + LineInfo lireader(iter->second.first + data, + iter->second.second - data, + reader_, line_handler_); + lireader.Start(); + } + break; + default: + break; + } + } +} + +void InlineStackHandler::FindBadSubprograms( + set *bad_subprograms) { + // Search for bad DIEs. The debug information often contains + // multiple entries for the same function. However, only one copy + // of the debug information corresponds to the actual emitted code. + // The others may be correct (if they got compiled identically) or + // they may be wrong. This code filters out bad debug information + // using two approaches: + // + // 1) If a non-inlined function's address ranges contain the + // starting address of other non-inlined functions, then it is + // bad. This approach is safe because the starting address for + // functions is accurate across all the DIEs. + // + // 2) If multiple functions start at the same address after pruning + // using phase one, then pick the largest one. This heuristic is + // based on the assumption that if the largest one were bad, + // then it would have conflicted with another function and would have + // been pruned in step 1. + + // Find the start addresses for each non-inlined subprogram. + set start_addresses; + for (vector::iterator subprogs = + subprogram_insert_order_.begin(); + subprogs != subprogram_insert_order_.end(); + ++subprogs) { + SubprogramInfo *subprog = *subprogs; + if (subprog->inlined()) + continue; + + uint64 start_address = + AddressRangeList::RangesMin(subprog->address_ranges()); + start_addresses.insert(start_address); + } + + // Find bad non-inlined subprograms according to rule (1) above. + for (vector::iterator subprogs = + subprogram_insert_order_.begin(); + subprogs != subprogram_insert_order_.end(); + ++subprogs) { + SubprogramInfo *subprog = *subprogs; + if (subprog->inlined()) + continue; + + typedef AddressRangeList::RangeList RangeList; + const RangeList *ranges = subprog->address_ranges(); + uint64 min_address = AddressRangeList::RangesMin(ranges); + uint64 max_address = AddressRangeList::RangesMax(ranges); + + set::iterator closest_match = + start_addresses.lower_bound(min_address); + + if (closest_match != start_addresses.end() && + (*closest_match) == min_address) + ++closest_match; + + if (closest_match != start_addresses.end() && + (*closest_match) < max_address) + bad_subprograms->insert(subprog); + } + + // Find the bad non-inlined subprograms according to rule (2) above. + map good_subprograms; + for (vector::iterator subprogs = + subprogram_insert_order_.begin(); + subprogs != subprogram_insert_order_.end(); + ++subprogs) { + SubprogramInfo *subprog = *subprogs; + + // Filter out non-inlined subprograms + if (subprog->inlined()) + continue; + + // Filter out subprograms with no range information + if (subprog->address_ranges()->size() == 0) + continue; + + // Filter out bad subprograms + if (bad_subprograms->find(subprog) != bad_subprograms->end()) + continue; + + // See if there is another subprogram at this address + uint64 start_address = AddressRangeList::RangesMin( + subprog->address_ranges()); + map::iterator other = + good_subprograms.find(start_address); + + if (other == good_subprograms.end()) { + // If there isn't, then update the map + good_subprograms[start_address] = subprog; + } else { + // If there is, update the map if this function is bigger + uint64 end_address = AddressRangeList::RangesMax( + subprog->address_ranges()); + uint64 other_end_address = AddressRangeList::RangesMax( + other->second->address_ranges()); + + if (end_address > other_end_address) { + good_subprograms[start_address] = subprog; + bad_subprograms->insert(other->second); + } else { + bad_subprograms->insert(subprog); + } + } + } + + // Expand the set of bad subprograms to include inlined subprograms. + // An inlined subprogram is bad if its parent is bad. Since + // subprograms are stored in a leaf-to-parent order in + // subprogram_insert_order_, it suffices to scan the vector + // backwards once. + for (vector::reverse_iterator subprogs = + subprogram_insert_order_.rbegin(); + subprogs != subprogram_insert_order_.rend(); + ++subprogs) { + SubprogramInfo *subprog = *subprogs; + if (bad_subprograms->find(subprog->parent()) != bad_subprograms->end()) { + bad_subprograms->insert(subprog); + } + } +} + +void InlineStackHandler::PopulateSubprogramsByAddress() { + // This variable should no longer be accessed. Let's set it to NULL + // here since this is the first opportunity to do so. + address_ranges_ = NULL; + + set bad_subprograms; + FindBadSubprograms(&bad_subprograms); + + // For the DIEs that are not marked bad, insert them into the + // address based map. + for (vector::iterator subprogs = + subprogram_insert_order_.begin(); + subprogs != subprogram_insert_order_.end(); + ++subprogs) { + SubprogramInfo *subprog = *subprogs; + + if (bad_subprograms.find(subprog) == bad_subprograms.end()) + subprograms_by_address_.InsertRangeList( + *subprog->address_ranges(), subprog); + } + + // Clear this vector to save some memory + subprogram_insert_order_.clear(); +} + +const SubprogramInfo *InlineStackHandler::GetSubprogramForAddress( + uint64 address) { + NonOverlappingRangeMap::ConstIterator iter = + subprograms_by_address_.Find(address); + if (iter != subprograms_by_address_.End()) + return iter->second; + else + return NULL; +} + +const SubprogramInfo *InlineStackHandler::GetDeclaration( + const SubprogramInfo *subprog) const { + const int cu_index = subprog->cu_index(); + const SubprogramInfo *declaration = subprog; + CHECK((unsigned)cu_index < subprograms_by_offset_maps_.size()); + SubprogramsByOffsetMap* subprograms_by_offset = + subprograms_by_offset_maps_[cu_index]; + while (declaration->name().empty() || declaration->callsite_line() == 0) { + uint64 specification = declaration->specification(); + if (specification) { + declaration = subprograms_by_offset->find(specification)->second; + } else { + uint64 abstract_origin = declaration->abstract_origin(); + if (abstract_origin) + declaration = subprograms_by_offset->find(abstract_origin)->second; + else + break; + } + } + return declaration; +} + +const SubprogramInfo *InlineStackHandler::GetAbstractOrigin( + const SubprogramInfo *subprog) const { + const int cu_index = subprog->cu_index(); + CHECK((unsigned)cu_index < subprograms_by_offset_maps_.size()); + SubprogramsByOffsetMap* subprograms_by_offset = + subprograms_by_offset_maps_[cu_index]; + if (subprog->abstract_origin()) + return subprograms_by_offset->find(subprog->abstract_origin())->second; + else + return subprog; +} + +void InlineStackHandler::GetSubprogramAddresses(set *addrs) { + for (auto it = subprograms_by_address_.Begin(); + it != subprograms_by_address_.End(); ++it) { + addrs->insert(it->first.first); + } +} + +InlineStackHandler::~InlineStackHandler() { + for (auto map : subprograms_by_offset_maps_) { + for (const auto &addr_subprog : *map) + delete addr_subprog.second; + delete map; + } + for (auto comp_dir : compilation_unit_comp_dir_) + delete comp_dir; +} + +} // namespace autofdo Index: lib/ProfileData/PerfConverter/symbolize/bytereader-inl.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/bytereader-inl.h @@ -0,0 +1,122 @@ +//=-- bytereader-inl.h ------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_BYTEREADER_INL_H__ +#define AUTOFDO_SYMBOLIZE_BYTEREADER_INL_H__ + +#include + +#include "symbolize/bytereader.h" + +namespace autofdo { + +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { + return buffer[0]; +} + +inline uint16 ByteReader::ReadTwoBytes(const char* buffer) const { + const uint16 buffer0 = static_cast(buffer[0]) & 0xff; + const uint16 buffer1 = static_cast(buffer[1]) & 0xff; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64 ByteReader::ReadFourBytes(const char* buffer) const { + const uint32 buffer0 = static_cast(buffer[0]) & 0xff; + const uint32 buffer1 = static_cast(buffer[1]) & 0xff; + const uint32 buffer2 = static_cast(buffer[2]) & 0xff; + const uint32 buffer3 = static_cast(buffer[3]) & 0xff; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64 ByteReader::ReadEightBytes(const char* buffer) const { + const uint64 buffer0 = static_cast(buffer[0]) & 0xff; + const uint64 buffer1 = static_cast(buffer[1]) & 0xff; + const uint64 buffer2 = static_cast(buffer[2]) & 0xff; + const uint64 buffer3 = static_cast(buffer[3]) & 0xff; + const uint64 buffer4 = static_cast(buffer[4]) & 0xff; + const uint64 buffer5 = static_cast(buffer[5]) & 0xff; + const uint64 buffer6 = static_cast(buffer[6]) & 0xff; + const uint64 buffer7 = static_cast(buffer[7]) & 0xff; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, + size_t* len) const { + uint64 result = 0; + size_t num_read = 0; + unsigned int shift = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast(byte & 0x7f)) << shift; + + shift += 7; + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, + size_t* len) const { + int64 result = 0; + unsigned shift = 0; + size_t num_read = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof (result)) && (byte & 0x40)) + result |= -((static_cast(1)) << shift); + *len = num_read; + return result; +} + +inline uint64 ByteReader::ReadOffset(const char* buffer) const { + CHECK(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64 ByteReader::ReadAddress(const char* buffer) const { + CHECK(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +} // namespace autofdo + +#endif // AUTOFDO_SYMBOLIZE_BYTEREADER_INL_H__ Index: lib/ProfileData/PerfConverter/symbolize/bytereader.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/bytereader.h @@ -0,0 +1,115 @@ +//=-- bytereader.h ----------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_BYTEREADER_H__ +#define AUTOFDO_SYMBOLIZE_BYTEREADER_H__ + +#include + +#include "llvm_port.h" + +namespace autofdo { + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { + ENDIANNESS_BIG, + ENDIANNESS_LITTLE, + ENDIANNESS_NATIVE = (__BYTE_ORDER == __BIG_ENDIAN ? ENDIANNESS_BIG + : ENDIANNESS_LITTLE) +}; + +// Class that knows how to read both big endian and little endian +// numbers, for use in DWARF2/3 reader. +// Takes an endianness argument. +// To read addresses and offsets, SetAddressSize and SetOffsetSize +// must be called first. +class ByteReader { + public: + explicit ByteReader(enum Endianness endian); + virtual ~ByteReader(); + + // Set the address size to SIZE, which sets up the ReadAddress member + // so that it works. + void SetAddressSize(uint8 size); + + // Set the offset size to SIZE, which sets up the ReadOffset member + // so that it works. + void SetOffsetSize(uint8 size); + + // Return the current offset size + uint8 OffsetSize() const { return offset_size_; } + + // Return the current address size + uint8 AddressSize() const { return address_size_; } + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8 ReadOneByte(const char* buffer) const; + + // Read two bytes from BUFFER and return it as an unsigned 16 bit + // number. + uint16 ReadTwoBytes(const char* buffer) const; + + // Read four bytes from BUFFER and return it as an unsigned 32 bit + // number. This function returns a uint64 so that it is compatible + // with ReadAddress and ReadOffset. The number it returns will + // never be outside the range of an unsigned 32 bit integer. + uint64 ReadFourBytes(const char* buffer) const; + + // Read eight bytes from BUFFER and return it as an unsigned 64 bit + // number + uint64 ReadEightBytes(const char* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. LEN is set + // to the length read. Everybody seems to reinvent LEB128 as a + // variable size integer encoding, DWARF has had it for a long time. + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. LEN is set to the length read. + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + uint64 ReadOffset(const char* buffer) const; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + uint64 ReadAddress(const char* buffer) const; + + private: + // Function pointer type for our address and offset readers. + typedef uint64 (ByteReader::*AddressReader)(const char*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8 address_size_; + uint8 offset_size_; + DISALLOW_EVIL_CONSTRUCTORS(ByteReader); +}; + +} // namespace autofdo + +#endif // AUTOFDO_SYMBOLIZE_BYTEREADER_H__ Index: lib/ProfileData/PerfConverter/symbolize/bytereader.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/bytereader.cc @@ -0,0 +1,41 @@ +//=-- bytereader.cc ---------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "symbolize/bytereader.h" +#include "symbolize/bytereader-inl.h" + +namespace autofdo { + +ByteReader::ByteReader(enum Endianness endian) + :offset_reader_(NULL), address_reader_(NULL), endian_(endian), + address_size_(0), offset_size_(0) +{ } + +ByteReader::~ByteReader() { } + +void ByteReader::SetOffsetSize(uint8 size) { + offset_size_ = size; + CHECK(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8 size) { + address_size_ = size; + CHECK(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +} // namespace autofdo Index: lib/ProfileData/PerfConverter/symbolize/dwarf2enums.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/dwarf2enums.h @@ -0,0 +1,580 @@ +//=-- dwarf2enums.h ---------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_DWARF2ENUMS_H__ +#define AUTOFDO_SYMBOLIZE_DWARF2ENUMS_H__ + +namespace autofdo { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. +enum DwarfTag { + DW_TAG_padding = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + // DWARF 3. + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + DW_TAG_condition = 0x3f, + DW_TAG_shared_type = 0x40, + // DWARF 4. + DW_TAG_type_unit = 0x41, + DW_TAG_rvalue_reference_type = 0x42, + DW_TAG_template_alias = 0x43, + DW_TAG_lo_user = 0x4080, + DW_TAG_hi_user = 0xffff, + // SGI/MIPS Extensions. + DW_TAG_MIPS_loop = 0x4081, + // HP extensions. See: + // ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz + DW_TAG_HP_array_descriptor = 0x4090, + // GNU extensions. + DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90. + DW_TAG_function_template = 0x4102, // For C++. + DW_TAG_class_template = 0x4103, // For C++. + DW_TAG_GNU_BINCL = 0x4104, + DW_TAG_GNU_EINCL = 0x4105, + // http://gcc.gnu.org/wiki/TemplateParmsDwarf + DW_TAG_GNU_template_template_param = 0x4106, + DW_TAG_GNU_template_parameter_pack = 0x4107, + DW_TAG_GNU_formal_parameter_pack = 0x4108, + // http://www.dwarfstd.org/ShowIssue.php?issue=100909.2&type=open + DW_TAG_GNU_call_site = 0x4109, + DW_TAG_GNU_call_site_parameter = 0x410a, + // Apple extensions. + DW_TAG_APPLE_property = 0x4200, + // Extensions for UPC. See: http://upc.gwu.edu/~upc. + DW_TAG_upc_shared_type = 0x8765, + DW_TAG_upc_strict_type = 0x8766, + DW_TAG_upc_relaxed_type = 0x8767, + // PGI (STMicroelectronics) extensions. No documentation available. + DW_TAG_PGI_kanji_type = 0xA000, + DW_TAG_PGI_interface_block = 0xA020 +}; + + +enum DwarfHasChild { + DW_children_no = 0, + DW_children_yes = 1 +}; + +// Form names and codes. +enum DwarfForm { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + // DWARF 4. + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_ref_sig8 = 0x20, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_FORM_GNU_addr_index = 0x1f01, + DW_FORM_GNU_str_index = 0x1f02 +}; + +// Attribute names and codes +enum DwarfAttribute { + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_items = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + // DWARF 3 values. + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + DW_AT_description = 0x5a, + DW_AT_binary_scale = 0x5b, + DW_AT_decimal_scale = 0x5c, + DW_AT_small = 0x5d, + DW_AT_descimal_sign = 0x5e, + DW_AT_digit_count = 0x5f, + DW_AT_picture_string = 0x60, + DW_AT_mutable = 0x61, + DW_AT_threads_scaled = 0x62, + DW_AT_explicit = 0x63, + DW_AT_object_pointer = 0x64, + DW_AT_endianity = 0x65, + DW_AT_elemental = 0x66, + DW_AT_pure = 0x67, + DW_AT_recursive = 0x68, + DW_AT_lo_user = 0x2000, + DW_AT_hi_user = 0x3fff, + // DWARF 4 values. + DW_AT_signature = 0x69, + DW_AT_main_subprogram = 0x6a, + DW_AT_data_bit_offset = 0x6b, + DW_AT_const_expr = 0x6c, + DW_AT_enum_class = 0x6d, + DW_AT_linkage_name = 0x6e, + // SGI/MIPS extensions. + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + // HP extensions. + DW_AT_HP_block_index = 0x2000, + DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde. + DW_AT_HP_actuals_stmt_list = 0x2010, + DW_AT_HP_proc_per_section = 0x2011, + DW_AT_HP_raw_data_ptr = 0x2012, + DW_AT_HP_pass_by_reference = 0x2013, + DW_AT_HP_opt_level = 0x2014, + DW_AT_HP_prof_version_id = 0x2015, + DW_AT_HP_opt_flags = 0x2016, + DW_AT_HP_cold_region_low_pc = 0x2017, + DW_AT_HP_cold_region_high_pc = 0x2018, + DW_AT_HP_all_variables_modifiable = 0x2019, + DW_AT_HP_linkage_name = 0x201a, + DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g. + // GNU extensions. + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + // http://gcc.gnu.org/wiki/ThreadSafetyAnnotation + DW_AT_GNU_guarded_by = 0x2108, + DW_AT_GNU_pt_guarded_by = 0x2109, + DW_AT_GNU_guarded = 0x210a, + DW_AT_GNU_pt_guarded = 0x210b, + DW_AT_GNU_locks_excluded = 0x210c, + DW_AT_GNU_exclusive_locks_required = 0x210d, + DW_AT_GNU_shared_locks_required = 0x210e, + // http://gcc.gnu.org/wiki/DwarfSeparateTypeInfo + DW_AT_GNU_odr_signature = 0x210f, + // http://gcc.gnu.org/wiki/TemplateParmsDwarf + DW_AT_GNU_template_name = 0x2110, + // http://www.dwarfstd.org/ShowIssue.php?issue=100909.2&type=open + DW_AT_GNU_call_site_value = 0x2111, + DW_AT_GNU_call_site_data_value = 0x2112, + DW_AT_GNU_call_site_target = 0x2113, + DW_AT_GNU_call_site_target_clobbered = 0x2114, + DW_AT_GNU_tail_call = 0x2115, + DW_AT_GNU_all_tail_call_sites = 0x2116, + DW_AT_GNU_all_call_sites = 0x2117, + DW_AT_GNU_all_source_call_sites = 0x2118, + DW_AT_GNU_macros = 0x2119, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_AT_GNU_dwo_name = 0x2130, + DW_AT_GNU_dwo_id = 0x2131, + DW_AT_GNU_ranges_base = 0x2132, + DW_AT_GNU_addr_base = 0x2133, + DW_AT_GNU_pubnames = 0x2134, + DW_AT_GNU_pubtypes = 0x2135, + // discriminator. + DW_AT_GNU_discriminator = 0x2136, + // VMS extensions. + DW_AT_VMS_rtnbeg_pd_address = 0x2201, + // UPC extension. + DW_AT_upc_threads_scaled = 0x3210, + // PGI (STMicroelectronics) extensions. + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02, + // Apple extensions. + DW_AT_APPLE_optimized = 0x3fe1, + DW_AT_APPLE_flags = 0x3fe2, + DW_AT_APPLE_isa = 0x3fe3, + DW_AT_APPLE_block = 0x3fe4, + DW_AT_APPLE_major_runtime_vers = 0x3fe5, + DW_AT_APPLE_runtime_class = 0x3fe6, + DW_AT_APPLE_omit_frame_ptr = 0x3fe7, + DW_AT_APPLE_property_name = 0x3fe8, + DW_AT_APPLE_property_getter = 0x3fe9, + DW_AT_APPLE_property_setter = 0x3fea, + DW_AT_APPLE_property_attribute = 0x3feb, + DW_AT_APPLE_objc_complete_type = 0x3fec +}; + + +// Line number opcodes. +enum DwarfLineNumberOps { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3, + DW_LNS_set_file = 4, + DW_LNS_set_column = 5, + DW_LNS_negate_stmt = 6, + DW_LNS_set_basic_block = 7, + DW_LNS_const_add_pc = 8, + DW_LNS_fixed_advance_pc = 9, + // DWARF 3. + DW_LNS_set_prologue_end = 10, + DW_LNS_set_epilogue_begin = 11, + DW_LNS_set_isa = 12 +}; + +// Line number extended opcodes. +enum DwarfLineNumberExtendedOps { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2, + DW_LNE_define_file = 3, + DW_LNE_set_discriminator = 4, + DW_LNE_lo_user = 0x80, + DW_LNE_hi_user = 0xff, + // HP extensions. + DW_LNE_HP_negate_is_UV_update = 0x11, + DW_LNE_HP_push_context = 0x12, + DW_LNE_HP_pop_context = 0x13, + DW_LNE_HP_set_file_line_column = 0x14, + DW_LNE_HP_set_routine_name = 0x15, + DW_LNE_HP_set_sequence = 0x16, + DW_LNE_HP_negate_post_semantics = 0x17, + DW_LNE_HP_negate_function_exit = 0x18, + DW_LNE_HP_negate_front_end_logical = 0x19, + DW_LNE_HP_define_proc = 0x20 +}; + +// Type encoding names and codes +enum DwarfEncoding { + DW_ATE_address =0x1, + DW_ATE_boolean =0x2, + DW_ATE_complex_float =0x3, + DW_ATE_float =0x4, + DW_ATE_signed =0x5, + DW_ATE_signed_char =0x6, + DW_ATE_unsigned =0x7, + DW_ATE_unsigned_char =0x8, + // DWARF3/DWARF3f + DW_ATE_imaginary_float =0x9, + DW_ATE_packed_decimal =0xa, + DW_ATE_numeric_string =0xb, + DW_ATE_edited =0xc, + DW_ATE_signed_fixed =0xd, + DW_ATE_unsigned_fixed =0xe, + DW_ATE_decimal_float =0xf, + // DWARF4 + DW_ATR_UTF =0x10, + DW_ATE_lo_user =0x80, + DW_ATE_hi_user =0xff +}; + +// Location virtual machine opcodes +enum DwarfOpcode { + DW_OP_addr =0x03, + DW_OP_deref =0x06, + DW_OP_const1u =0x08, + DW_OP_const1s =0x09, + DW_OP_const2u =0x0a, + DW_OP_const2s =0x0b, + DW_OP_const4u =0x0c, + DW_OP_const4s =0x0d, + DW_OP_const8u =0x0e, + DW_OP_const8s =0x0f, + DW_OP_constu =0x10, + DW_OP_consts =0x11, + DW_OP_dup =0x12, + DW_OP_drop =0x13, + DW_OP_over =0x14, + DW_OP_pick =0x15, + DW_OP_swap =0x16, + DW_OP_rot =0x17, + DW_OP_xderef =0x18, + DW_OP_abs =0x19, + DW_OP_and =0x1a, + DW_OP_div =0x1b, + DW_OP_minus =0x1c, + DW_OP_mod =0x1d, + DW_OP_mul =0x1e, + DW_OP_neg =0x1f, + DW_OP_not =0x20, + DW_OP_or =0x21, + DW_OP_plus =0x22, + DW_OP_plus_uconst =0x23, + DW_OP_shl =0x24, + DW_OP_shr =0x25, + DW_OP_shra =0x26, + DW_OP_xor =0x27, + DW_OP_bra =0x28, + DW_OP_eq =0x29, + DW_OP_ge =0x2a, + DW_OP_gt =0x2b, + DW_OP_le =0x2c, + DW_OP_lt =0x2d, + DW_OP_ne =0x2e, + DW_OP_skip =0x2f, + DW_OP_lit0 =0x30, + DW_OP_lit1 =0x31, + DW_OP_lit2 =0x32, + DW_OP_lit3 =0x33, + DW_OP_lit4 =0x34, + DW_OP_lit5 =0x35, + DW_OP_lit6 =0x36, + DW_OP_lit7 =0x37, + DW_OP_lit8 =0x38, + DW_OP_lit9 =0x39, + DW_OP_lit10 =0x3a, + DW_OP_lit11 =0x3b, + DW_OP_lit12 =0x3c, + DW_OP_lit13 =0x3d, + DW_OP_lit14 =0x3e, + DW_OP_lit15 =0x3f, + DW_OP_lit16 =0x40, + DW_OP_lit17 =0x41, + DW_OP_lit18 =0x42, + DW_OP_lit19 =0x43, + DW_OP_lit20 =0x44, + DW_OP_lit21 =0x45, + DW_OP_lit22 =0x46, + DW_OP_lit23 =0x47, + DW_OP_lit24 =0x48, + DW_OP_lit25 =0x49, + DW_OP_lit26 =0x4a, + DW_OP_lit27 =0x4b, + DW_OP_lit28 =0x4c, + DW_OP_lit29 =0x4d, + DW_OP_lit30 =0x4e, + DW_OP_lit31 =0x4f, + DW_OP_reg0 =0x50, + DW_OP_reg1 =0x51, + DW_OP_reg2 =0x52, + DW_OP_reg3 =0x53, + DW_OP_reg4 =0x54, + DW_OP_reg5 =0x55, + DW_OP_reg6 =0x56, + DW_OP_reg7 =0x57, + DW_OP_reg8 =0x58, + DW_OP_reg9 =0x59, + DW_OP_reg10 =0x5a, + DW_OP_reg11 =0x5b, + DW_OP_reg12 =0x5c, + DW_OP_reg13 =0x5d, + DW_OP_reg14 =0x5e, + DW_OP_reg15 =0x5f, + DW_OP_reg16 =0x60, + DW_OP_reg17 =0x61, + DW_OP_reg18 =0x62, + DW_OP_reg19 =0x63, + DW_OP_reg20 =0x64, + DW_OP_reg21 =0x65, + DW_OP_reg22 =0x66, + DW_OP_reg23 =0x67, + DW_OP_reg24 =0x68, + DW_OP_reg25 =0x69, + DW_OP_reg26 =0x6a, + DW_OP_reg27 =0x6b, + DW_OP_reg28 =0x6c, + DW_OP_reg29 =0x6d, + DW_OP_reg30 =0x6e, + DW_OP_reg31 =0x6f, + DW_OP_breg0 =0x70, + DW_OP_breg1 =0x71, + DW_OP_breg2 =0x72, + DW_OP_breg3 =0x73, + DW_OP_breg4 =0x74, + DW_OP_breg5 =0x75, + DW_OP_breg6 =0x76, + DW_OP_breg7 =0x77, + DW_OP_breg8 =0x78, + DW_OP_breg9 =0x79, + DW_OP_breg10 =0x7a, + DW_OP_breg11 =0x7b, + DW_OP_breg12 =0x7c, + DW_OP_breg13 =0x7d, + DW_OP_breg14 =0x7e, + DW_OP_breg15 =0x7f, + DW_OP_breg16 =0x80, + DW_OP_breg17 =0x81, + DW_OP_breg18 =0x82, + DW_OP_breg19 =0x83, + DW_OP_breg20 =0x84, + DW_OP_breg21 =0x85, + DW_OP_breg22 =0x86, + DW_OP_breg23 =0x87, + DW_OP_breg24 =0x88, + DW_OP_breg25 =0x89, + DW_OP_breg26 =0x8a, + DW_OP_breg27 =0x8b, + DW_OP_breg28 =0x8c, + DW_OP_breg29 =0x8d, + DW_OP_breg30 =0x8e, + DW_OP_breg31 =0x8f, + DW_OP_regX =0x90, + DW_OP_fbreg =0x91, + DW_OP_bregX =0x92, + DW_OP_piece =0x93, + DW_OP_deref_size =0x94, + DW_OP_xderef_size =0x95, + DW_OP_nop =0x96, + // DWARF3/DWARF3f + DW_OP_push_object_address =0x97, + DW_OP_call2 =0x98, + DW_OP_call4 =0x99, + DW_OP_call_ref =0x9a, + DW_OP_form_tls_address =0x9b, + DW_OP_call_frame_cfa =0x9c, + DW_OP_bit_piece =0x9d, + // DWARF4 + DW_OP_implicit_value =0x9e, + DW_OP_stack_value =0x9f, + DW_OP_lo_user =0xe0, + DW_OP_hi_user =0xff, + // GNU extensions + DW_OP_GNU_push_tls_address =0xe0, + DW_OP_GNU_uninit =0xf0, + DW_OP_GNU_encoded_addr =0xf1, + // http://www.dwarfstd.org/ShowIssue.php?issue=100831.1&type=open + DW_OP_GNU_implicit_pointer =0xf2, + // http://www.dwarfstd.org/ShowIssue.php?issue=100909.1&type=open + DW_OP_GNU_entry_value =0xf3, + // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission. + DW_OP_GNU_addr_index =0xfb, + DW_OP_GNU_const_index =0xfc +}; + +} // namespace autofdo +#endif // AUTOFDO_SYMBOLIZE_DWARF2ENUMS_H__ Index: lib/ProfileData/PerfConverter/symbolize/dwarf2reader.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/dwarf2reader.h @@ -0,0 +1,583 @@ +//=-- dwarf2reader.h --------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file contains definitions related to the DWARF2/3 reader and +// it's handler interfaces. +// The DWARF2/3 specification can be found at +// http://dwarf.freestandards.org and should be considered required +// reading if you wish to modify the implementation. +// I have only made a cursory attempt to explain terminology that is +// used here, as it is much better explained in the standard documents +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_DWARF2READER_H__ +#define AUTOFDO_SYMBOLIZE_DWARF2READER_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include "llvm_port.h" + +#include "symbolize/dwarf2enums.h" + +namespace autofdo { +class ElfReader; +class ByteReader; +class Dwarf2Handler; +class LineInfoHandler; +class DwpReader; + +// This maps from a string naming a section to a pair containing a +// the data for the section, and the size of the section. +typedef map > SectionMap; +typedef list > AttributeList; +typedef AttributeList::iterator AttributeIterator; +typedef AttributeList::const_iterator ConstAttributeIterator; + +// A vector containing directory names +typedef vector DirectoryVector; +// A vector containing a directory name index and a file name +typedef vector > FileVector; + +struct LineInfoHeader { + uint64 total_length; + uint16 version; + uint64 prologue_length; + uint8 min_insn_length; // insn stands for instruction + uint8 max_ops_per_insn; + bool default_is_stmt; // stmt stands for statement + int8 line_base; + uint8 line_range; + uint8 opcode_base; + // Use a pointer so that signalsafe_addr2line is able to use this structure + // without heap allocation problem. + vector *std_opcode_lengths; +}; + +class LineInfo { + public: + // Initializes a .debug_line reader. Buffer and buffer length point + // to the beginning and length of the line information to read. + // Reader is a ByteReader class that has the endianness set + // properly. + LineInfo(const char* buffer_, uint64 buffer_length, + ByteReader* reader, LineInfoHandler* handler); + + virtual ~LineInfo() { + if (header_.std_opcode_lengths) { + delete header_.std_opcode_lengths; + } + } + + bool malformed() const {return malformed_;} + + // Start processing line info, and calling callbacks in the handler. + // Consumes the line number information for a single compilation unit. + // Returns the number of bytes processed. + uint64 Start(); + + // Process a single line info opcode at START using the state + // machine at LSM. Return true if we should define a line using the + // current state of the line state machine. Place the length of the + // opcode in LEN. + static bool ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader &header, + const char* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr_t pc); + + private: + // Advance lineptr in a buffer. If lineptr will advance beyond the + // end of buffer_, malformed_ is set true. + // + // Arguments: + // incr: how far to increment + // lineptr: pointer to adjust + // + // Returns true if lineptr is advanced. + bool AdvanceLinePtr(int incr, const char **lineptr); + + // Reads the DWARF2/3 header for this line info. + void ReadHeader(); + + // Reads the DWARF2/3 line information + void ReadLines(); + + // The associated handler to call processing functions in + LineInfoHandler* handler_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // A DWARF2/3 line info header. This is not the same size as + // in the actual file, as the one in the file may have a 32 bit or + // 64 bit lengths + + struct LineInfoHeader header_; + + // buffer is the buffer for our line info, starting at exactly where + // the line info to read is. after_header is the place right after + // the end of the line information header. + const char* buffer_; + uint64 buffer_length_; + const char* after_header_; + bool malformed_; + DISALLOW_EVIL_CONSTRUCTORS(LineInfo); +}; + +// This class is the main interface between the line info reader and +// the client. The virtual functions inside this get called for +// interesting events that happen during line info reading. The +// default implementation does nothing + +class LineInfoHandler { + public: + LineInfoHandler() { } + + virtual ~LineInfoHandler() { } + + // Called when we define a directory. NAME is the directory name, + // DIR_NUM is the directory number + virtual void DefineDir(const char *name, uint32 dir_num) { } + + // Called when we define a filename. NAME is the filename, FILE_NUM + // is the file number which is -1 if the file index is the next + // index after the last numbered index (this happens when files are + // dynamically defined by the line program), DIR_NUM is the + // directory index for the directory name of this file, MOD_TIME is + // the modification time of the file, and LENGTH is the length of + // the file + virtual void DefineFile(const char *name, int32 file_num, + uint32 dir_num, uint64 mod_time, + uint64 length) { } + + // Called when the line info reader has a new line, address pair + // ready for us. ADDRESS is the address of the code, FILE_NUM is + // the file number containing the code, LINE_NUM is the line number + // in that file for the code, COLUMN_NUM is the column number the + // code starts at, if we know it (0 otherwise), and DISCRIMINATOR is + // the path discriminator identifying the basic block on the + // specified line. + virtual void AddLine(uint64 address, uint32 file_num, uint32 line_num, + uint32 column_num, uint32 discriminator) { } + + private: + DISALLOW_EVIL_CONSTRUCTORS(LineInfoHandler); +}; + +// This class is the main interface between the reader and the +// client. The virtual functions inside this get called for +// interesting events that happen during DWARF2 reading. +// The default implementation skips everything. + +class Dwarf2Handler { + public: + Dwarf2Handler() { } + + virtual ~Dwarf2Handler() { } + + // Start to process a compilation unit at OFFSET from the beginning of the + // debug_info section. Return false if you would like + // to skip this compilation unit. + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version) { return false; } + + // Start to process a DIE at OFFSET from the beginning of the + // debug_info section. Return false if you would like to skip this + // DIE. + virtual bool StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { return false; } + + // Called when we have an attribute with unsigned data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + + // Called when we have an attribute with signed data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { } + + // Called when we have an attribute with a buffer of data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA, and the + // length of the buffer is LENGTH. The buffer is owned by the + // caller, not the callee, and may not persist for very long. If + // you want the data to be available later, it needs to be copied. + virtual void ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data, + uint64 len) { } + + // Called when we have an attribute with string data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data) { } + + // Called when finished processing the DIE at OFFSET. + // Because DWARF2/3 specifies a tree of DIEs, you may get starts + // before ends of the previous DIE, as we process children before + // ending the parent. + virtual void EndDIE(uint64 offset) { } + + private: + DISALLOW_EVIL_CONSTRUCTORS(Dwarf2Handler); +}; + +// The base of DWARF2/3 debug info is a DIE (Debugging Information +// Entry. +// DWARF groups DIE's into a tree and calls the root of this tree a +// "compilation unit". Most of the time, their is one compilation +// unit in the .debug_info section for each file that had debug info +// generated. +// Each DIE consists of + +// 1. a tag specifying a thing that is being described (ie +// DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc +// 2. attributes (such as DW_AT_location for location in memory, +// DW_AT_name for name), and data for each attribute. +// 3. A flag saying whether the DIE has children or not + +// In order to gain some amount of compression, the format of +// each DIE (tag name, attributes and data forms for the attributes) +// are stored in a separate table called the "abbreviation table". +// This is done because a large number of DIEs have the exact same tag +// and list of attributes, but different data for those attributes. +// As a result, the .debug_info section is just a stream of data, and +// requires reading of the .debug_abbrev section to say what the data +// means. + +// As a warning to the user, it should be noted that the reason for +// using absolute offsets from the beginning of .debug_info is that +// DWARF2/3 support referencing DIE's from other DIE's by their offset +// from either the current compilation unit start, *or* the beginning +// of the .debug_info section. This means it is possible to reference +// a DIE in one compilation unit from a DIE in another compilation +// unit. This style of reference is usually used to eliminate +// duplicated information that occurs across compilation +// units, such as base types, etc. GCC 3.4+ support this with +// -feliminate-dwarf2-dups. Other toolchains will sometimes do +// duplicate elimination in the linker. + +class CompilationUnit { + public: + // Initialize a compilation unit. This requires a map of sections, + // the offset of this compilation unit in the debug_info section, a + // ByteReader, and a Dwarf2Handler class to call callbacks in. + CompilationUnit(const string& path, const SectionMap& sections, + uint64 offset, ByteReader* reader, Dwarf2Handler* handler); + + virtual ~CompilationUnit(); + + // Initialize a compilation unit from a .dwo or .dwp file. + // In this case, we need the .debug_addr section from the + // executable file that contains the corresponding skeleton + // compilation unit. We also inherit the Dwarf2Handler from + // the executable file, and call it as if we were still + // processing the original compilation unit. + void SetSplitDwarf(const char* addr_buffer, uint64 addr_buffer_length, + uint64 addr_base, uint64 ranges_base); + + bool malformed() const {return malformed_;} + + // Begin reading a Dwarf2 compilation unit, and calling the + // callbacks in the Dwarf2Handler + // Return the offset of the end of the compilation unit - the passed + // in offset. + uint64 Start(); + + private: + // This struct represents a single DWARF2/3 abbreviation + // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a + // tag and a list of attributes, as well as the data form of each attribute. + struct Abbrev { + uint32 number; + enum DwarfTag tag; + bool has_children; + AttributeList attributes; + }; + + // A DWARF2/3 compilation unit header. This is not the same size as + // in the actual file, as the one in the file may have a 32 bit or + // 64 bit length. + struct CompilationUnitHeader { + uint64 length; + uint16 version; + uint64 abbrev_offset; + uint8 address_size; + } header_; + + // Reads the DWARF2/3 header for this compilation unit. + void ReadHeader(); + + // Reads the DWARF2/3 abbreviations for this compilation unit + void ReadAbbrevs(); + + // Processes a single DIE for this compilation unit. + // + // Returns a new pointer just past the end of it, or NULL if a + // malformed DIE is encountered. + const char* ProcessDIE(uint64 dieoffset, + const char* start, + const Abbrev& abbrev); + + // Processes a single attribute. + // + // Returns a new pointer just past the end of it, or NULL if malformed. + const char* ProcessAttribute(uint64 dieoffset, + const char* start, + enum DwarfAttribute attr, + enum DwarfForm form); + + // Called when we have an attribute with unsigned data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + // If we see a DW_AT_GNU_dwo_id attribute, save the value so that + // we can find the debug info in a .dwo or .dwp file. + void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + if (attr == DW_AT_GNU_dwo_id) + dwo_id_ = data; + else if (attr == DW_AT_GNU_addr_base) + addr_base_ = data; + else if (attr == DW_AT_GNU_ranges_base) + ranges_base_ = data; + else if (attr == DW_AT_ranges) + data += ranges_base_; + handler_->ProcessAttributeUnsigned(offset, attr, form, data); + } + + // Called when we have an attribute with signed data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + void ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { + handler_->ProcessAttributeSigned(offset, attr, form, data); + } + + // Called when we have an attribute with a buffer of data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA, and the + // length of the buffer is LENGTH. + void ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data, + uint64 len) { + handler_->ProcessAttributeBuffer(offset, attr, form, data, len); + } + + // Called when we have an attribute with string data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + // If we see a DW_AT_GNU_dwo_name attribute, save the value so + // that we can find the debug info in a .dwo or .dwp file. + void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data) { + if (attr == DW_AT_GNU_dwo_name) + dwo_name_ = data; + handler_->ProcessAttributeString(offset, attr, form, data); + } + + // Processes all DIEs for this compilation unit + void ProcessDIEs(); + + // Skips the die with attributes specified in ABBREV starting at + // START, and return the new place to position the stream to. + const char* SkipDIE(const char* start, const Abbrev& abbrev); + + // Skips the attribute starting at START, with FORM, and return the + // new place to position the stream to. + const char* SkipAttribute(const char* start, enum DwarfForm form); + + // Process the actual debug information in a split DWARF file. + void ProcessSplitDwarf(); + + // Read the debug sections from a .dwo file. + void ReadDebugSectionsFromDwo(ElfReader* elf_reader, + SectionMap* sections); + + // Path of the file containing the debug information. + const string path_; + + // Offset from section start is the offset of this compilation unit + // from the beginning of the .debug_info section. + uint64 offset_from_section_start_; + + // buffer is the buffer for our CU, starting at .debug_info + offset + // passed in from constructor. + // after_header points to right after the compilation unit header. + const char* buffer_; + uint64 buffer_length_; + const char* after_header_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // The map of sections in our file to buffers containing their data + const SectionMap& sections_; + + // The associated handler to call processing functions in + Dwarf2Handler* handler_; + + // Set of DWARF2/3 abbreviations for this compilation unit. Indexed + // by abbreviation number, which means that abbrevs_[0] is not + // valid. + vector* abbrevs_; + + // String section buffer and length, if we have a string section. + // This is here to avoid doing a section lookup for strings in + // ProcessAttribute, which is in the hot path for DWARF2 reading. + const char* string_buffer_; + uint64 string_buffer_length_; + + // String offsets section buffer and length, if we have a string offsets + // section (.debug_str_offsets or .debug_str_offsets.dwo). + const char* str_offsets_buffer_; + uint64 str_offsets_buffer_length_; + + // Address section buffer and length, if we have an address section + // (.debug_addr). + const char* addr_buffer_; + uint64 addr_buffer_length_; + + // Flag indicating whether this compilation unit is part of a .dwo + // or .dwp file. If true, we are reading this unit because a + // skeleton compilation unit in an executable file had a + // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. + // In a .dwo file, we expect the string offsets section to + // have a ".dwo" suffix, and we will use the ".debug_addr" section + // associated with the skeleton compilation unit. + bool is_split_dwarf_; + + // The value of the DW_AT_GNU_dwo_id attribute, if any. + uint64 dwo_id_; + + // The value of the DW_AT_GNU_dwo_name attribute, if any. + const char* dwo_name_; + + // The value of the DW_AT_GNU_ranges_base attribute, if any. + uint64 ranges_base_; + + // The value of the DW_AT_GNU_addr_base attribute, if any. + uint64 addr_base_; + + // True if we have already looked for a .dwp file. + bool have_checked_for_dwp_; + + // Path to the .dwp file. + string dwp_path_; + + // ByteReader for the DWP file. + ByteReader* dwp_byte_reader_; + + // DWP reader. + DwpReader* dwp_reader_; + + bool malformed_; + DISALLOW_EVIL_CONSTRUCTORS(CompilationUnit); +}; + +// A Reader for a .dwp file. Supports the fetching of DWARF debug +// info for a given dwo_id. +// +// There are two versions of .dwp files. In both versions, the +// .dwp file is an ELF file containing only debug sections. +// In Version 1, the file contains many copies of each debug +// section, one for each .dwo file that is packaged in the .dwp +// file, and the .debug_cu_index section maps from the dwo_id +// to a set of section indexes. In Version 2, the file contains +// one of each debug section, and the .debug_cu_index section +// maps from the dwo_id to a set of offsets and lengths that +// identify each .dwo file's contribution to the larger sections. + +class DwpReader { + public: + DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); + + ~DwpReader(); + + // Read the CU index and initialize data members. + void Initialize(); + + // Read the debug sections for the given dwo_id. + void ReadDebugSectionsForCU(uint64 dwo_id, SectionMap* sections); + + private: + // Search the hash table for "dwo_id". Returns the slot index + // where the dwo_id was found, or -1 if it was not found. + int LookupCU(uint64 dwo_id); + + // The ELF reader for the .dwp file. + ElfReader* elf_reader_; + + // The ByteReader for the .dwp file. + const ByteReader& byte_reader_; + + // Pointer to the .debug_cu_index section. + const char* cu_index_; + + // Size of the .debug_cu_index section. + size_t cu_index_size_; + + // Pointer to the .debug_str.dwo section. + const char* string_buffer_; + + // Size of the .debug_str.dwo section. + size_t string_buffer_size_; + + // Version of the .dwp file. We support version 1 currently. + int version_; + + // Number of slots in the hash table. + unsigned int nslots_; + + // Pointer to the beginning of the hash table. + const char* phash_; + + // Pointer to the beginning of the index table. + const char* pindex_; + + // Pointer to the beginning of the section index pool. + const char* shndx_pool_; +}; + +} // namespace autofdo + +#endif // AUTOFDO_SYMBOLIZE_DWARF2READER_H__ Index: lib/ProfileData/PerfConverter/symbolize/dwarf2reader.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/dwarf2reader.cc @@ -0,0 +1,1178 @@ +//=-- dwarf2reader.cc -------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "symbolize/dwarf2reader.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "symbolize/bytereader.h" +#include "symbolize/bytereader-inl.h" +#include "symbolize/elf_reader.h" +#include "symbolize/line_state_machine.h" + +namespace autofdo { + +// Read a DWARF2/3 initial length field from START, using READER, and +// report the length in LEN. Return the actual initial length. + +static uint64 ReadInitialLength(const char* start, + ByteReader* reader, size_t* len) { + const uint64 initial_length = reader->ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + reader->SetOffsetSize(8); + *len = 12; + return reader->ReadOffset(start); + } else { + reader->SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +CompilationUnit::CompilationUnit(const string& path, + const SectionMap& sections, uint64 offset, + ByteReader* reader, Dwarf2Handler* handler) + : path_(path), offset_from_section_start_(offset), reader_(reader), + sections_(sections), handler_(handler), abbrevs_(NULL), + string_buffer_(NULL), string_buffer_length_(0), + str_offsets_buffer_(NULL), str_offsets_buffer_length_(0), + addr_buffer_(NULL), addr_buffer_length_(0), + is_split_dwarf_(false), dwo_id_(0), dwo_name_(), ranges_base_(0), + addr_base_(0), have_checked_for_dwp_(false), dwp_path_(), + dwp_byte_reader_(NULL), dwp_reader_(NULL), malformed_(false) {} + +CompilationUnit::~CompilationUnit() { + if (abbrevs_) delete abbrevs_; + if (dwp_reader_) delete dwp_reader_; + if (dwp_byte_reader_) delete dwp_byte_reader_; +} + +// Initialize a compilation unit from a .dwo or .dwp file. +// In this case, we need the .debug_addr section from the +// executable file that contains the corresponding skeleton +// compilation unit. We also inherit the Dwarf2Handler from +// the executable file, and call it as if we were still +// processing the original compilation unit. + +void CompilationUnit::SetSplitDwarf(const char* addr_buffer, + uint64 addr_buffer_length, + uint64 addr_base, + uint64 ranges_base) { + is_split_dwarf_ = true; + addr_buffer_ = addr_buffer; + addr_buffer_length_ = addr_buffer_length; + addr_base_ = addr_base; + ranges_base_ = ranges_base; +} + +// Read a DWARF2/3 abbreviation section. +// Each abbrev consists of a abbreviation number, a tag, a byte +// specifying whether the tag has children, and a list of +// attribute/form pairs. +// The list of forms is terminated by a 0 for the attribute, and a +// zero for the form. The entire abbreviation section is terminated +// by a zero for the code. + +void CompilationUnit::ReadAbbrevs() { + if (abbrevs_) + return; + + // First get the debug_abbrev section + SectionMap::const_iterator iter = sections_.find(".debug_abbrev"); + CHECK(iter != sections_.end()); + + abbrevs_ = new vector; + abbrevs_->resize(1); + + // The only way to CHECK whether we are reading over the end of the + // buffer would be to first compute the size of the leb128 data by + // reading it, then go back and read it again. + const char* abbrev_start = iter->second.first + + header_.abbrev_offset; + const char* abbrevptr = abbrev_start; + const uint64 abbrev_length = iter->second.second - header_.abbrev_offset; + + while (1) { + CompilationUnit::Abbrev abbrev; + size_t len; + const uint32 number = reader_->ReadUnsignedLEB128(abbrevptr, &len); + + if (number == 0) + break; + abbrev.number = number; + abbrevptr += len; + + DCHECK(abbrevptr < abbrev_start + abbrev_length); + const uint32 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + abbrev.tag = static_cast(tag); + + DCHECK(abbrevptr < abbrev_start + abbrev_length); + abbrev.has_children = reader_->ReadOneByte(abbrevptr); + abbrevptr += 1; + + DCHECK(abbrevptr < abbrev_start + abbrev_length); + + while (1) { + const uint32 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + + DCHECK(abbrevptr < abbrev_start + abbrev_length); + const uint32 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + if (nametemp == 0 && formtemp == 0) + break; + + const enum DwarfAttribute name = + static_cast(nametemp); + const enum DwarfForm form = static_cast(formtemp); + abbrev.attributes.push_back(make_pair(name, form)); + } + CHECK(abbrev.number == abbrevs_->size()); + abbrevs_->push_back(abbrev); + } +} + +// Skips a single DIE's attributes. +const char* CompilationUnit::SkipDIE(const char* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = SkipAttribute(start, i->second); + } + return start; +} + +// Skips a single attribute form's data. +const char* CompilationUnit::SkipAttribute(const char* start, + enum DwarfForm form) { + size_t len; + + switch (form) { + case DW_FORM_indirect: + form = static_cast(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return SkipAttribute(start, form); + break; + + case DW_FORM_flag_present: + return start; + break; + + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + return start + 1; + break; + case DW_FORM_ref2: + case DW_FORM_data2: + return start + 2; + break; + case DW_FORM_ref4: + case DW_FORM_data4: + return start + 4; + break; + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + case DW_FORM_data8: + return start + 8; + break; + case DW_FORM_string: + return start + strlen(start) + 1; + break; + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_addr_index: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + break; + + case DW_FORM_sdata: + reader_->ReadSignedLEB128(start, &len); + return start + len; + break; + case DW_FORM_addr: + return start + reader_->AddressSize(); + break; + case DW_FORM_ref_addr: + // DWARF2 and 3 differ on whether ref_addr is address size or + // offset size. + if (header_.version == 2) { + return start + reader_->AddressSize(); + } else { + return start + reader_->OffsetSize(); + } + break; + + case DW_FORM_block1: + return start + 1 + reader_->ReadOneByte(start); + break; + case DW_FORM_block2: + return start + 2 + reader_->ReadTwoBytes(start); + break; + case DW_FORM_block4: + return start + 4 + reader_->ReadFourBytes(start); + break; + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64 size = reader_->ReadUnsignedLEB128(start, &len); + return start + size + len; + } + break; + case DW_FORM_strp: + case DW_FORM_sec_offset: + return start + reader_->OffsetSize(); + break; + default: + LOG(FATAL) << "Unhandled form type"; + } + LOG(FATAL) << "Unhandled form type"; + return NULL; +} + +// Read a DWARF2/3 header. +// The header is variable length in DWARF3 (and DWARF2 as extended by +// most compilers), and consists of an length field, a version number, +// the offset in the .debug_abbrev section for our abbrevs, and an +// address size. +void CompilationUnit::ReadHeader() { + const char* headerptr = buffer_; + size_t initial_length_size; + + if (headerptr + 4 >= buffer_ + buffer_length_) { + malformed_ = true; + return; + } + const uint64 initial_length = ReadInitialLength(headerptr, reader_, + &initial_length_size); + headerptr += initial_length_size; + header_.length = initial_length; + if (header_.length == 0 + || headerptr + 2 >= buffer_ + buffer_length_) { + malformed_ = true; + return; + } + + header_.version = reader_->ReadTwoBytes(headerptr); + if (header_.version < 2 || header_.version > 4) { + malformed_ = true; + return; + } + headerptr += 2; + + if (headerptr + reader_->OffsetSize() >= buffer_ + buffer_length_) { + malformed_ = true; + return; + } + header_.abbrev_offset = reader_->ReadOffset(headerptr); + headerptr += reader_->OffsetSize(); + + if (headerptr + 1 >= buffer_ + buffer_length_) { + malformed_ = true; + return; + } + header_.address_size = reader_->ReadOneByte(headerptr); + if (header_.address_size != 4 && header_.address_size != 8) { + malformed_ = true; + return; + } + reader_->SetAddressSize(header_.address_size); + headerptr += 1; + + after_header_ = headerptr; + + // This check ensures that we don't have to do checking during the + // reading of DIEs. header_.length does not include the size of the + // initial length. + if (buffer_ + initial_length_size + header_.length > + buffer_ + buffer_length_) { + malformed_ = true; + return; + } +} + +uint64 CompilationUnit::Start() { + // First get the debug_info section + SectionMap::const_iterator iter = sections_.find(".debug_info"); + CHECK(iter != sections_.end()); + + // Set up our buffer + buffer_ = iter->second.first + offset_from_section_start_; + buffer_length_ = iter->second.second - offset_from_section_start_; + + // Read the header + ReadHeader(); + + // If the header is malformed, the data may be uninitialized and we + // don't know how to proceed in this section, so return the size of + // the section so the loop will stop. + if (malformed()) { + return iter->second.second;; + } + + // Figure out the real length from the end of the initial length to + // the end of the compilation unit, since that is the value we + // return. + uint64 ourlength = header_.length; + if (reader_->OffsetSize() == 8) + ourlength += 12; + else + ourlength += 4; + + // If the user does not want it, just return. + if (!handler_->StartCompilationUnit(offset_from_section_start_, + reader_->AddressSize(), + reader_->OffsetSize(), + header_.length, + header_.version)) + return ourlength; + + // Otherwise, continue by reading our abbreviation entries. + ReadAbbrevs(); + + // Set the string section if we have one. + iter = sections_.find(".debug_str"); + if (iter != sections_.end()) { + string_buffer_ = iter->second.first; + string_buffer_length_ = iter->second.second; + } + + // Set the string offsets section if we have one. + iter = sections_.find(".debug_str_offsets"); + if (iter != sections_.end()) { + str_offsets_buffer_ = iter->second.first; + str_offsets_buffer_length_ = iter->second.second; + } + + // Set the address section if we have one. + iter = sections_.find(".debug_addr"); + if (iter != sections_.end()) { + addr_buffer_ = iter->second.first; + addr_buffer_length_ = iter->second.second; + } + + // Now that we have our abbreviations, start processing DIE's. + ProcessDIEs(); + + return ourlength; +} + +// If one really wanted, you could merge SkipAttribute and +// ProcessAttribute +// This is all boring data manipulation and calling of the handler. +const char* CompilationUnit::ProcessAttribute( + uint64 dieoffset, const char* start, enum DwarfAttribute attr, + enum DwarfForm form) { + size_t len; + + switch (form) { + // DW_FORM_indirect is never used because it is such a space + // waster. + case DW_FORM_indirect: + form = static_cast(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return ProcessAttribute(dieoffset, start, attr, form); + break; + + case DW_FORM_flag_present: + ProcessAttributeUnsigned(dieoffset, attr, form, 1); + return start; + break; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOneByte(start)); + return start + 1; + break; + case DW_FORM_ref2: + case DW_FORM_data2: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadTwoBytes(start)); + return start + 2; + break; + case DW_FORM_ref4: + case DW_FORM_data4: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadFourBytes(start)); + return start + 4; + break; + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + case DW_FORM_data8: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + break; + case DW_FORM_string: { + const char* str = start; + ProcessAttributeString(dieoffset, attr, form, + str); + return start + strlen(str) + 1; + break; + } + case DW_FORM_udata: + case DW_FORM_ref_udata: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, + &len)); + return start + len; + break; + + case DW_FORM_sdata: + ProcessAttributeSigned(dieoffset, attr, form, + reader_->ReadSignedLEB128(start, &len)); + return start + len; + break; + case DW_FORM_addr: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + break; + case DW_FORM_ref_addr: + // DWARF2 and 3 differ on whether ref_addr is address size or + // offset size. + if (header_.version == 2) { + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + } else { + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + } + break; + case DW_FORM_sec_offset: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + break; + + case DW_FORM_block1: { + uint64 datalen = reader_->ReadOneByte(start); + ProcessAttributeBuffer(dieoffset, attr, form, start + 1, + datalen); + return start + 1 + datalen; + break; + } + case DW_FORM_block2: { + uint64 datalen = reader_->ReadTwoBytes(start); + ProcessAttributeBuffer(dieoffset, attr, form, start + 2, + datalen); + return start + 2 + datalen; + break; + } + case DW_FORM_block4: { + uint64 datalen = reader_->ReadFourBytes(start); + ProcessAttributeBuffer(dieoffset, attr, form, start + 4, + datalen); + return start + 4 + datalen; + break; + } + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64 datalen = reader_->ReadUnsignedLEB128(start, &len); + ProcessAttributeBuffer(dieoffset, attr, form, start + len, + datalen); + return start + datalen + len; + break; + } + case DW_FORM_strp: { + CHECK(string_buffer_ != NULL); + + const uint64 offset = reader_->ReadOffset(start); + if (offset >= string_buffer_length_) { + LOG(WARNING) << "offset is out of range. offset=" << offset + << " string_buffer_length_=" << string_buffer_length_; + return NULL; + } + + const char* str = string_buffer_ + offset; + ProcessAttributeString(dieoffset, attr, form, + str); + return start + reader_->OffsetSize(); + break; + } + case DW_FORM_GNU_str_index: { + CHECK(string_buffer_ != NULL); + CHECK(str_offsets_buffer_ != NULL); + + uint64 str_index = reader_->ReadUnsignedLEB128(start, &len); + const char* offset_ptr = + str_offsets_buffer_ + str_index * reader_->OffsetSize(); + const uint64 offset = reader_->ReadOffset(offset_ptr); + if (offset >= string_buffer_length_) { + LOG(WARNING) << "offset is out of range. offset=" << offset + << " string_buffer_length_=" << string_buffer_length_; + return NULL; + } + + const char* str = string_buffer_ + offset; + ProcessAttributeString(dieoffset, attr, form, + str); + return start + len; + break; + } + case DW_FORM_GNU_addr_index: { + CHECK(addr_buffer_ != NULL); + uint64 addr_index = reader_->ReadUnsignedLEB128(start, &len); + const char* addr_ptr = + addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(addr_ptr)); + return start + len; + break; + } + default: + LOG(FATAL) << "Unhandled form type"; + } + LOG(FATAL) << "Unhandled form type"; + return NULL; +} + +const char* CompilationUnit::ProcessDIE(uint64 dieoffset, + const char* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = ProcessAttribute(dieoffset, start, i->first, i->second); + if (start == NULL) { + break; + } + } + + // If this is a skeleton compilation unit generated with split DWARF, + // we need to find the full compilation unit in a .dwo or .dwp file. + if (abbrev.tag == DW_TAG_compile_unit + && !is_split_dwarf_ + && dwo_name_ != NULL) + ProcessSplitDwarf(); + + return start; +} + +void CompilationUnit::ProcessDIEs() { + const char* dieptr = after_header_; + size_t len; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const char* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + stack die_stack; + + while (dieptr < (lengthstart + header_.length)) { + // We give the user the absolute offset from the beginning of + // debug_info, since they need it to deal with ref_addr forms. + uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_; + + uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len); + + dieptr += len; + + // Abbrev == 0 represents the end of a list of children, or padding between + // sections. + if (abbrev_num == 0) { + if (!die_stack.empty()) { + const uint64 offset = die_stack.top(); + die_stack.pop(); + handler_->EndDIE(offset); + } + continue; + } + + const Abbrev& abbrev = abbrevs_->at(abbrev_num); + const enum DwarfTag tag = abbrev.tag; + if (!handler_->StartDIE(absolute_offset, tag, abbrev.attributes)) { + dieptr = SkipDIE(dieptr, abbrev); + } else { + dieptr = ProcessDIE(absolute_offset, dieptr, abbrev); + if (dieptr == NULL) { + break; + } + } + + if (abbrev.has_children) { + die_stack.push(absolute_offset); + } else { + handler_->EndDIE(absolute_offset); + } + } +} + +// Check for a valid ELF file and return the Address size. +// Returns 0 if not a valid ELF file. + +inline int GetElfWidth(const ElfReader& elf) { + if (elf.IsElf32File()) + return 4; + if (elf.IsElf64File()) + return 8; + return 0; +} + +void CompilationUnit::ProcessSplitDwarf() { + struct stat statbuf; + + if (!have_checked_for_dwp_) { + // Look for a .dwp file in the same directory as the executable. + have_checked_for_dwp_ = true; + dwp_path_ = path_ + ".dwp"; + if (stat(dwp_path_.c_str(), &statbuf) == 0) { + ElfReader* elf = new ElfReader(dwp_path_); + int width = GetElfWidth(*elf); + if (width != 0) { + dwp_byte_reader_ = new ByteReader(ENDIANNESS_NATIVE); + dwp_byte_reader_->SetAddressSize(width); + dwp_reader_ = new DwpReader(*dwp_byte_reader_, elf); + dwp_reader_->Initialize(); + } else { + LOG(WARNING) << "File '" << dwp_path_ << "' is not an ELF file."; + delete elf; + } + } + } + if (dwp_reader_ != NULL) { + // If we have a .dwp file, read the debug sections for the requested CU. + SectionMap sections; + dwp_reader_->ReadDebugSectionsForCU(dwo_id_, §ions); + CompilationUnit dwp_comp_unit(dwp_path_, sections, 0, dwp_byte_reader_, + handler_); + dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_, + ranges_base_); + dwp_comp_unit.Start(); + if (dwp_comp_unit.malformed()) + LOG(WARNING) << "File '" << dwp_path_ << "' has mangled " + << ".debug_info.dwo section."; + } else { + // If no .dwp file, try to open the .dwo file. + if (stat(dwo_name_, &statbuf) == 0) { + ElfReader elf(dwo_name_); + int width = GetElfWidth(elf); + if (width != 0) { + ByteReader reader(ENDIANNESS_NATIVE); + reader.SetAddressSize(width); + SectionMap sections; + ReadDebugSectionsFromDwo(&elf, §ions); + CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader, + handler_); + dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, + addr_base_, ranges_base_); + dwo_comp_unit.Start(); + if (dwo_comp_unit.malformed()) + LOG(WARNING) << "File '" << dwo_name_ << "' has mangled " + << ".debug_info.dwo section."; + } else { + LOG(WARNING) << "File '" << dwo_name_ << "' is not an ELF file."; + } + } else { + LOG(WARNING) << "Cannot open file '" << dwo_name_ << "'."; + } + } +} + +void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader, + SectionMap* sections) { + static const char* section_names[] = { + ".debug_abbrev", + ".debug_info", + ".debug_str_offsets", + ".debug_str" + }; + for (unsigned i = 0; i < arraysize(section_names); ++i) { + string base_name = section_names[i]; + string dwo_name = base_name + ".dwo"; + size_t section_size; + const char* section_data = elf_reader->GetSectionByName(dwo_name, + §ion_size); + if (section_data != NULL) + sections->insert( + make_pair(base_name, make_pair(section_data, section_size))); + } +} + +DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader) + : elf_reader_(elf_reader), byte_reader_(byte_reader), + cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL), + string_buffer_size_(0), version_(0), nslots_(0), + phash_(NULL), pindex_(NULL), shndx_pool_(NULL) {} + +DwpReader::~DwpReader() { + if (elf_reader_) delete elf_reader_; +} + +void DwpReader::Initialize() { + cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index", + &cu_index_size_); + if (cu_index_ == NULL) + return; + + // The .debug_str.dwo section is shared by all CUs in the file. + string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo", + &string_buffer_size_); + + version_ = byte_reader_.ReadFourBytes(cu_index_); + + if (version_ == 1) { + nslots_ = byte_reader_.ReadFourBytes(cu_index_ + 3 * sizeof(uint32)); + phash_ = cu_index_ + 4 * sizeof(uint32); + pindex_ = phash_ + nslots_ * sizeof(uint64); + shndx_pool_ = pindex_ + nslots_ * sizeof(uint32); + if (shndx_pool_ >= cu_index_ + cu_index_size_) { + LOG(WARNING) << ".debug_cu_index is corrupt"; + version_ = 0; + } + } else { + LOG(WARNING) << "Unexpected version number in .dwp file."; + } +} + +void DwpReader::ReadDebugSectionsForCU(uint64 dwo_id, + SectionMap* sections) { + if (version_ == 1) { + int slot = LookupCU(dwo_id); + if (slot == -1) { + LOG(WARNING) << "dwo_id 0x" << dwo_id << + " not found in .dwp file."; + return; + } + + // The index table points to the section index pool, where we + // can read a list of section indexes for the debug sections + // for the CU whose dwo_id we are looking for. + int index = byte_reader_.ReadFourBytes(pindex_ + slot * sizeof(uint32)); + const char* shndx_list = shndx_pool_ + index * sizeof(uint32); + for (;;) { + if (shndx_list >= cu_index_ + cu_index_size_) { + LOG(WARNING) << ".debug_cu_index is corrupt"; + return; + } + unsigned int shndx = byte_reader_.ReadFourBytes(shndx_list); + shndx_list += sizeof(uint32); + if (shndx == 0) + break; + const char* section_name = elf_reader_->GetSectionName(shndx); + size_t section_size; + const char* section_data; + // We're only interested in these four debug sections. + // The section names in the .dwo file end with ".dwo", but we + // add them to the sections table with their normal names. + if (strncmp(section_name, ".debug_abbrev", 13) == 0) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(make_pair(".debug_abbrev", + make_pair(section_data, section_size))); + } else if (strncmp(section_name, ".debug_info", 11) == 0) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(make_pair(".debug_info", + make_pair(section_data, section_size))); + } else if (strncmp(section_name, ".debug_str_offsets", 18) == 0) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(make_pair(".debug_str_offsets", + make_pair(section_data, section_size))); + } + } + sections->insert(make_pair(".debug_str", + make_pair(string_buffer_, string_buffer_size_))); + } +} + +int DwpReader::LookupCU(uint64 dwo_id) { + unsigned int slot = static_cast(dwo_id) & (nslots_ - 1); + uint64 probe = byte_reader_.ReadEightBytes(phash_ + slot * sizeof(uint64)); + if (probe != 0 && probe != dwo_id) { + unsigned int secondary_hash = + (static_cast(dwo_id >> 32) & (nslots_ - 1)) | 1; + do { + slot = (slot + secondary_hash) & (nslots_ - 1); + probe = byte_reader_.ReadEightBytes(phash_ + slot * sizeof(uint64)); + } while (probe != 0 && probe != dwo_id); + } + if (probe == 0) + return -1; + return slot; +} + +LineInfo::LineInfo(const char* buffer, uint64 buffer_length, + ByteReader* reader, LineInfoHandler* handler): + handler_(handler), reader_(reader), buffer_(buffer), + buffer_length_(buffer_length), malformed_(false) { + header_.std_opcode_lengths = NULL; +} + +uint64 LineInfo::Start() { + ReadHeader(); + if (malformed()) { + // Return the buffer_length_ so callers will not process further + // in this section. + return buffer_length_; + } + ReadLines(); + return after_header_ - buffer_; +} + +bool LineInfo::AdvanceLinePtr(int incr, const char **lineptr) { + const char *buffer_end = buffer_ + buffer_length_; + if (*lineptr + incr >= buffer_end) { + // The '>=' comparison above is somewhat bogus: it assumes that we + // are going to necessarily read from the resulting lineptr. + // It would be better to check for 'lineptr < buffer_end' before + // reading from lineptr instead of after incrementing it. + malformed_ = true; + return false; + } + *lineptr += incr; + return true; +} + +// The header for a debug_line section is mildly complicated, because +// the line info is very tightly encoded. +void LineInfo::ReadHeader() { + const char* lineptr = buffer_; + size_t initial_length_size; + + const uint64 initial_length = ReadInitialLength(lineptr, reader_, + &initial_length_size); + + if (!AdvanceLinePtr(initial_length_size, &lineptr)) { + return; + } + header_.total_length = initial_length; + if (buffer_ + initial_length_size + header_.total_length + > buffer_ + buffer_length_) { + malformed_ = true; + return; + } + + // Address size *must* be set by CU ahead of time. + if (reader_->AddressSize() == 0) { + malformed_ = true; + return; + } + + header_.version = reader_->ReadTwoBytes(lineptr); + if (!AdvanceLinePtr(2, &lineptr)) { + return; + } + if (header_.version < 2 || header_.version > 4) { + malformed_ = true; + return; + } + + header_.prologue_length = reader_->ReadOffset(lineptr); + if (!AdvanceLinePtr(reader_->OffsetSize(), &lineptr)) { + return; + } + + header_.min_insn_length = reader_->ReadOneByte(lineptr); + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + + if (header_.version >= 4) { + header_.max_ops_per_insn = reader_->ReadOneByte(lineptr); + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + } else { + header_.max_ops_per_insn = 1; + } + + header_.default_is_stmt = reader_->ReadOneByte(lineptr); + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + + header_.line_base = *reinterpret_cast(lineptr); + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + + header_.line_range = reader_->ReadOneByte(lineptr); + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + + header_.opcode_base = reader_->ReadOneByte(lineptr); + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + + header_.std_opcode_lengths = new vector; + header_.std_opcode_lengths->resize(header_.opcode_base + 1); + (*header_.std_opcode_lengths)[0] = 0; + for (int i = 1; i < header_.opcode_base; i++) { + (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr); + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + } + + // It is legal for the directory entry table to be empty. + if (*lineptr) { + uint32 dirindex = 1; + while (*lineptr) { + const char* dirname = lineptr; + handler_->DefineDir(dirname, dirindex); + if (!AdvanceLinePtr(strlen(dirname) + 1, &lineptr)) { + return; + } + dirindex++; + } + } + if (!AdvanceLinePtr(1, &lineptr)) { + return; + } + + // It is also legal for the file entry table to be empty. + if (*lineptr) { + uint32 fileindex = 1; + size_t len; + while (*lineptr) { + const char* filename = lineptr; + if (!AdvanceLinePtr(strlen(filename) + 1, &lineptr)) { + return; + } + + uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); + if (!AdvanceLinePtr(len, &lineptr)) { + return; + } + + uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); + if (!AdvanceLinePtr(len, &lineptr)) { + return; + } + + uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len); + if (!AdvanceLinePtr(len, &lineptr)) { + return; + } + handler_->DefineFile(filename, fileindex, dirindex, mod_time, + filelength); + fileindex++; + } + } + if (++lineptr > buffer_ + buffer_length_) { + malformed_ = true; + return; + } + + after_header_ = lineptr; +} + +/* static */ +bool LineInfo::ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader &header, + const char* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr_t pc) { + size_t oplen = 0; + size_t templen; + uint8 opcode = reader->ReadOneByte(start); + oplen++; + start++; + + // If the opcode is great than the opcode_base, it is a special + // opcode. Most line programs consist mainly of special opcodes. + if (opcode >= header.opcode_base) { + opcode -= header.opcode_base; + const int64 advance_address = (opcode / header.line_range) + * header.min_insn_length; + const int64 advance_line = (opcode % header.line_range) + + header.line_base; + + lsm->address += advance_address; + lsm->line_num += advance_line; + lsm->basic_block = true; + *len = oplen; + return true; + } + + // Otherwise, we have the regular opcodes + switch (opcode) { + case DW_LNS_copy: { + lsm->basic_block = false; + *len = oplen; + return true; + } + + case DW_LNS_advance_pc: { + uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->address += header.min_insn_length * advance_address; + } + break; + case DW_LNS_advance_line: { + const int64 advance_line = reader->ReadSignedLEB128(start, &templen); + oplen += templen; + lsm->line_num += advance_line; + } + break; + case DW_LNS_set_file: { + const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->file_num = fileno; + } + break; + case DW_LNS_set_column: { + const uint64 colno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->column_num = colno; + } + break; + case DW_LNS_negate_stmt: { + lsm->is_stmt = !lsm->is_stmt; + } + break; + case DW_LNS_set_basic_block: { + lsm->basic_block = true; + } + break; + case DW_LNS_fixed_advance_pc: { + const uint16 advance_address = reader->ReadTwoBytes(start); + oplen += 2; + lsm->address += advance_address; + } + break; + case DW_LNS_const_add_pc: { + const int64 advance_address = header.min_insn_length + * ((255 - header.opcode_base) + / header.line_range); + lsm->address += advance_address; + } + break; + case DW_LNS_extended_op: { + const size_t extended_op_len = reader->ReadUnsignedLEB128(start, + &templen); + start += templen; + oplen += templen + extended_op_len; + + const uint64 extended_op = reader->ReadOneByte(start); + start++; + + switch (extended_op) { + case DW_LNE_end_sequence: { + lsm->end_sequence = true; + *len = oplen; + return true; + } + break; + case DW_LNE_set_address: { + uint64 address = reader->ReadAddress(start); + lsm->address = address; + } + break; + case DW_LNE_define_file: { + const char* filename = start; + + templen = strlen(filename) + 1; + start += templen; + + uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen); + start += templen; + + const uint64 mod_time = reader->ReadUnsignedLEB128(start, + &templen); + start += templen; + + const uint64 filelength = reader->ReadUnsignedLEB128(start, + &templen); + start += templen; + + if (handler) { + handler->DefineFile(filename, -1, dirindex, mod_time, + filelength); + } + } + break; + case DW_LNE_set_discriminator: { + const uint64 discriminator = reader->ReadUnsignedLEB128(start, + &templen); + lsm->discriminator = static_cast(discriminator); + } + break; + } + } + break; + + default: { + // Ignore unknown opcode silently + if (header.std_opcode_lengths) { + for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) { + size_t templen; + reader->ReadUnsignedLEB128(start, &templen); + start += templen; + oplen += templen; + } + } + } + break; + } + *len = oplen; + return false; +} + +void LineInfo::ReadLines() { + struct LineStateMachine lsm; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const char* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + const char* lineptr = after_header_; + while (lineptr < lengthstart + header_.total_length) { + lsm.Reset(header_.default_is_stmt); + while (!lsm.end_sequence) { + size_t oplength; + bool add_line = ProcessOneOpcode(reader_, handler_, header_, + lineptr, &lsm, &oplength, -1); + if (add_line) { + handler_->AddLine(lsm.address, lsm.file_num, lsm.line_num, + lsm.column_num, lsm.discriminator); + lsm.basic_block = false; + lsm.discriminator = 0; + } + lineptr += oplength; + } + } + + after_header_ = lengthstart + header_.total_length; +} + +} // namespace autofdo Index: lib/ProfileData/PerfConverter/symbolize/dwarf3ranges.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/dwarf3ranges.h @@ -0,0 +1,73 @@ +//=-- dwarf3ranges.h --------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_DWARF3RANGES_H_ +#define AUTOFDO_SYMBOLIZE_DWARF3RANGES_H_ + +#include +#include +#include + +#include "symbolize/bytereader.h" + +namespace autofdo { + +// This class represents a DWARF3 non-contiguous address range. The +// contents of an address range section are passed in +// (e.g. .debug_ranges) and subsequently, an interpretation of any +// offset in the section can be requested. +class AddressRangeList { + public: + typedef pair Range; + typedef vector RangeList; + AddressRangeList(const char* buffer, + uint64 buffer_length, + ByteReader* reader) + : reader_(reader), + buffer_(buffer), + buffer_length_(buffer_length) { } + + void ReadRangeList(uint64 offset, uint64 base, + RangeList* output); + + static uint64 RangesMin(const RangeList *ranges) { + if (ranges->size() == 0) + return 0; + + uint64 result = kint64max; + for (AddressRangeList::RangeList::const_iterator iter = + ranges->begin(); + iter != ranges->end(); ++iter) { + result = min(result, iter->first); + } + return result; + } + + static uint64 RangesMax(const RangeList *ranges) { + uint64 result = 0; + for (AddressRangeList::RangeList::const_iterator iter = + ranges->begin(); + iter != ranges->end(); ++iter) { + result = max(result, iter->second); + } + return result; + } + + private: + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // buffer is the buffer for our range info + const char* buffer_; + uint64 buffer_length_; + DISALLOW_COPY_AND_ASSIGN(AddressRangeList); +}; + +} // namespace autofdo + +#endif // AUTOFDO_SYMBOLIZE_DWARF3RANGES_H_ Index: lib/ProfileData/PerfConverter/symbolize/dwarf3ranges.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/dwarf3ranges.cc @@ -0,0 +1,43 @@ +//=-- dwarf3ranges.cc -------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "symbolize/dwarf3ranges.h" + +#include "symbolize/bytereader.h" +#include "symbolize/bytereader-inl.h" + +namespace autofdo { + +void AddressRangeList::ReadRangeList(uint64 offset, uint64 base, + AddressRangeList::RangeList* ranges) { + uint8 width = reader_->AddressSize(); + + uint64 largest_address; + if (width == 4) + largest_address = 0xffffffffL; + else if (width == 8) + largest_address = 0xffffffffffffffffLL; + else + LOG(FATAL) << "width==" << width << " must be 4 or 8"; + + const char* pos = buffer_ + offset; + do { + CHECK((pos + 2*width) <= (buffer_ + buffer_length_)); + uint64 start = reader_->ReadAddress(pos); + uint64 stop = reader_->ReadAddress(pos+width); + if (start == largest_address) + base = stop; + else if (start == 0 && stop == 0) + break; + else + ranges->push_back(make_pair(start+base, stop+base)); + pos += 2*width; + } while (true); +} + +} // namespace autofdo Index: lib/ProfileData/PerfConverter/symbolize/elf_reader.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/elf_reader.h @@ -0,0 +1,147 @@ +//=-- elf_reader.h ----------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// ElfReader handles reading in ELF. It can extract symbols from the +// current process, which may be used to symbolize stack traces +// without having to make a potentially dangerous call to fork(). +// +// ElfReader dynamically allocates memory, so it is not appropriate to +// use once the address space might be corrupted, such as during +// process death. +// +// ElfReader supports both 32-bit and 64-bit ELF binaries. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_ELF_READER_H__ +#define AUTOFDO_SYMBOLIZE_ELF_READER_H__ + +#include + +#include "llvm_port.h" + +namespace autofdo { + +class Elf32; +class Elf64; +template +class ElfReaderImpl; + +class ElfReader { + public: + explicit ElfReader(const string &path); + ~ElfReader(); + + // Parse the ELF prologue of this file and return whether it was + // successfully parsed and matches the word size and byte order of + // the current process. + bool IsNativeElfFile() const; + + // Similar to IsNativeElfFile but checks if it's a 32-bit ELF file. + bool IsElf32File() const; + + // Similar to IsNativeElfFile but checks if it's a 64-bit ELF file. + bool IsElf64File() const; + + // Checks if it's an ELF file of type ET_DYN (shared object file). + bool IsDynamicSharedObject(); + + class SymbolSink { + public: + virtual ~SymbolSink() {} + virtual void AddSymbol(const char *name, uint64 address, uint64 size) = 0; + }; + + // Like AddSymbols above, but with no address correction. + // Processes any SHT_SYMTAB section, followed by any SHT_DYNSYM section. + void VisitSymbols(SymbolSink *sink); + + // Like VisitSymbols above, but for a specific symbol binding/type. + // A negative value for the binding and type parameters means any + // binding or type. + void VisitSymbols(SymbolSink *sink, int symbol_binding, int symbol_type); + + // Like VisitSymbols above but can optionally export raw symbol values instead + // of adjusted ones. + void VisitSymbols(SymbolSink *sink, int symbol_binding, int symbol_type, + bool get_raw_symbol_values); + + // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD + // segments are present. This is the address an ELF image was linked + // (by static linker) to be loaded at. Usually (but not always) 0 for + // shared libraries and position-independent executables. + uint64 VaddrOfFirstLoadSegment(); + + // Return the name of section "shndx". Returns NULL if the section + // is not found. + const char *GetSectionName(int shndx); + + // Get section "shndx" from the given ELF file. On success, return + // the pointer to the section and store the size in "size". + // On error, return NULL. The returned section data is only valid + // until the ElfReader gets destroyed. + const char *GetSectionByIndex(int shndx, size_t *size); + + // Get section with "section_name" (ex. ".text", ".symtab") in the + // given ELF file. On success, return the pointer to the section + // and store the size in "size". On error, return NULL. The + // returned section data is only valid until the ElfReader gets + // destroyed. + const char *GetSectionByName(const string §ion_name, size_t *size); + + // Gets the buildid of the binary. + string GetBuildId(); + + // This is like GetSectionByName() but it returns a lot of extra information + // about the section. The SectionInfo structure is almost identical to + // the typedef struct Elf64_Shdr defined in , but is redefined + // here so that the many short macro names in don't have to be + // added to our already cluttered namespace. + struct SectionInfo { + uint32 type; // Section type (SHT_xxx constant from elf.h). + uint64 flags; // Section flags (SHF_xxx constants from elf.h). + uint64 addr; // Section virtual address at execution. + uint64 offset; // Section file offset. + uint64 size; // Section size in bytes. + uint32 link; // Link to another section. + uint32 info; // Additional section information. + uint64 addralign; // Section alignment. + uint64 entsize; // Entry size if section holds a table. + }; + const char *GetSectionInfoByName(const string §ion_name, + SectionInfo *info); + + // Check if "path" is an ELF binary that has not been stripped of symbol + // tables. This function supports both 32-bit and 64-bit ELF binaries. + static bool IsNonStrippedELFBinary(const string &path); + + // Check if "path" is an ELF binary that has not been stripped of debug + // info. Unlike IsNonStrippedELFBinary, this function will return + // false for binaries passed through "strip -S". + static bool IsNonDebugStrippedELFBinary(const string &path); + + private: + // Lazily initialize impl32_ and return it. + ElfReaderImpl *GetImpl32(); + // Ditto for impl64_. + ElfReaderImpl *GetImpl64(); + + // Path of the file we're reading. + const string path_; + // Read-only file descriptor for the file. May be -1 if there was an + // error during open. + int fd_; + ElfReaderImpl *impl32_; + ElfReaderImpl *impl64_; + + DISALLOW_COPY_AND_ASSIGN(ElfReader); +}; + +} // namespace autofdo + +#endif // AUTOFDO_SYMBOLIZE_ELF_READER_H__ Index: lib/ProfileData/PerfConverter/symbolize/elf_reader.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/elf_reader.cc @@ -0,0 +1,859 @@ +//=-- elf_reader.cc ---------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Code for reading in ELF files. +// +// For information on the ELF format, see +// http://www.x86.org/ftp/manuals/tools/elf.pdf +// +// I also liked: +// http://www.caldera.com/developers/gabi/1998-04-29/contents.html +// +// A note about types: When dealing with the file format, we use types +// like Elf32_Word, but in the public interfaces we treat all +// addresses as uint64. As a result, we should be able to symbolize +// 64-bit binaries from a 32-bit process (which we don't do, +// anyway). size_t should therefore be avoided, except where required +// by things like mmap(). +// +// Although most of this code can deal with arbitrary ELF files of +// either word size, the public ElfReader interface only examines +// files loaded into the current address space, which must all match +// __WORDSIZE. This code cannot handle ELF files with a non-native +// byte ordering. +// +// TODO(chatham): It would be nice if we could accomplish this task +// without using malloc(), so we could use it as the process is dying. +// +//===----------------------------------------------------------------------===// + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // needed for pread() +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "symbolize/elf_reader.h" + +namespace { + +// The lowest bit of an ARM symbol value is used to indicate a Thumb address. +const int kARMThumbBitOffset = 0; + +// Converts an ARM Thumb symbol value to a true aligned address value. +template +T AdjustARMThumbSymbolValue(const T& symbol_table_value) { + return symbol_table_value & ~(1 << kARMThumbBitOffset); +} +} // namespace + +namespace autofdo { + +template class ElfReaderImpl; + +// 32-bit and 64-bit ELF files are processed exactly the same, except +// for various field sizes. Elf32 and Elf64 encompass all of the +// differences between the two formats, and all format-specific code +// in this file is templated on one of them. +class Elf32 { + public: + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Word Word; + typedef Elf32_Sym Sym; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS32; + + // Given a symbol pointer, return the binding type (eg STB_WEAK). + static char Bind(const Elf32_Sym *sym) { + return ELF32_ST_BIND(sym->st_info); + } + // Given a symbol pointer, return the symbol type (eg STT_FUNC). + static char Type(const Elf32_Sym *sym) { + return ELF32_ST_TYPE(sym->st_info); + } +}; + + +class Elf64 { + public: + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Word Word; + typedef Elf64_Sym Sym; + + // What should be in the EI_CLASS header. + static const int kElfClass = ELFCLASS64; + + static char Bind(const Elf64_Sym *sym) { + return ELF64_ST_BIND(sym->st_info); + } + static char Type(const Elf64_Sym *sym) { + return ELF64_ST_TYPE(sym->st_info); + } +}; + + +// ElfSectionReader mmaps a section of an ELF file ("section" is ELF +// terminology). The ElfReaderImpl object providing the section header +// must exist for the lifetime of this object. +// +// The motivation for mmaping individual sections of the file is that +// many Google executables are large enough when unstripped that we +// have to worry about running out of virtual address space. +template +class ElfSectionReader { + public: + ElfSectionReader(const string &path, int fd, + const typename ElfArch::Shdr §ion_header) + : header_(section_header) { + // Back up to the beginning of the page we're interested in. + const size_t additional = header_.sh_offset % getpagesize(); + const size_t offset_aligned = header_.sh_offset - additional; + section_size_ = header_.sh_size; + size_aligned_ = section_size_ + additional; + contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED, + fd, offset_aligned); + if (contents_aligned_ == MAP_FAILED) + PLOG(FATAL) << "Could not mmap " << path; + // Set where the offset really should begin. + contents_ = reinterpret_cast(contents_aligned_) + + (header_.sh_offset - offset_aligned); + } + + ~ElfSectionReader() { + munmap(contents_aligned_, size_aligned_); + } + + // Return the section header for this section. + typename ElfArch::Shdr const &header() const { return header_; } + + // Return memory at the given offset within this section. + const char *GetOffset(typename ElfArch::Word bytes) const { + CHECK(contents_ != NULL); + return contents_ + bytes; + } + + const char *contents() const { return contents_; } + size_t section_size() const { return section_size_; } + + private: + // page-aligned file contents + void *contents_aligned_; + // pointer within contents_aligned_ to where the section data begins + const char *contents_; + // size of contents_aligned_ + size_t size_aligned_; + // size of contents. + size_t section_size_; + const typename ElfArch::Shdr header_; + + DISALLOW_EVIL_CONSTRUCTORS(ElfSectionReader); +}; + +// An iterator over symbols in a given section. It handles walking +// through the entries in the specified section and mapping symbol +// entries to their names in the appropriate string table (in +// another section). +template +class SymbolIterator { + public: + SymbolIterator(ElfReaderImpl *reader, + typename ElfArch::Word section_type) + : symbol_section_(reader->GetSectionByType(section_type)), + string_section_(NULL), + num_symbols_in_section_(0), + symbol_within_section_(0) { + CHECK(section_type == SHT_SYMTAB || section_type == SHT_DYNSYM); + + // If this section type doesn't exist, leave + // num_symbols_in_section_ as zero, so this iterator is already + // done(). + if (symbol_section_ != NULL) { + num_symbols_in_section_ = symbol_section_->header().sh_size / + symbol_section_->header().sh_entsize; + + // Symbol sections have sh_link set to the section number of + // the string section containing the symbol names. + CHECK_NE(symbol_section_->header().sh_link, 0); + string_section_ = reader->GetSection(symbol_section_->header().sh_link); + } + } + + // Return true iff we have passed all symbols in this section. + bool done() const { + return symbol_within_section_ >= num_symbols_in_section_; + } + + // Advance to the next symbol in this section. + // REQUIRES: !done() + void Next() { ++symbol_within_section_; } + + // Return a pointer to the current symbol. + // REQUIRES: !done() + const typename ElfArch::Sym *GetSymbol() const { + CHECK(!done()); + return reinterpret_cast( + symbol_section_->GetOffset(symbol_within_section_ * + symbol_section_->header().sh_entsize)); + } + + // Return the name of the current symbol, NULL if it has none. + // REQUIRES: !done() + const char *GetSymbolName() const { + int name_offset = GetSymbol()->st_name; + if (name_offset == 0) + return NULL; + return string_section_->GetOffset(name_offset); + } + + private: + const ElfSectionReader *const symbol_section_; + const ElfSectionReader *string_section_; + int num_symbols_in_section_; + int symbol_within_section_; + DISALLOW_EVIL_CONSTRUCTORS(SymbolIterator); +}; + + +// ElfReader loads an ELF binary and can provide information about its +// contents. It is most useful for matching addresses to function +// names. It does not understand debugging formats (eg dwarf2), so it +// can't print line numbers. It takes a path to an elf file and a +// readable file descriptor for that file, which it does not assume +// ownership of. +template +class ElfReaderImpl { + public: + explicit ElfReaderImpl(const string &path, int fd) + : path_(path), + fd_(fd), + section_headers_(NULL), + program_headers_(NULL) { + CHECK_GE(fd_, 0); + string error; + CHECK(IsArchElfFile(fd, &error)) << " Could not parse file: " << error; + ParseHeaders(fd, path); + } + + ~ElfReaderImpl() { + for (int i = 0; i < sections_.size(); ++i) + delete sections_[i]; + delete [] section_headers_; + delete [] program_headers_; + } + + // Examine the headers of the file and return whether the file looks + // like an ELF file for this architecture. Takes an already-open + // file descriptor for the candidate file, reading in the prologue + // to see if the ELF file appears to match the current + // architecture. If error is non-NULL, it will be set with a reason + // in case of failure. + static bool IsArchElfFile(int fd, string *error) { + unsigned char header[EI_NIDENT]; + if (pread(fd, header, sizeof(header), 0) != sizeof(header)) { + if (error != NULL) *error = "Could not read header"; + return false; + } + + if (memcmp(header, ELFMAG, SELFMAG) != 0) { + if (error != NULL) *error = "Missing ELF magic"; + return false; + } + + if (header[EI_CLASS] != ElfArch::kElfClass) { + if (error != NULL) *error = "Different word size"; + return false; + } + + int endian = 0; + if (header[EI_DATA] == ELFDATA2LSB) + endian = __LITTLE_ENDIAN; + else if (header[EI_DATA] == ELFDATA2MSB) + endian = __BIG_ENDIAN; + if (endian != __BYTE_ORDER) { + if (error != NULL) *error = "Different byte order"; + return false; + } + + return true; + } + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink *sink) { + VisitSymbols(section_type, sink, -1, -1, false); + } + + void VisitSymbols(typename ElfArch::Word section_type, + ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + for (SymbolIterator it(this, section_type); + !it.done(); it.Next()) { + const char *name = it.GetSymbolName(); + if (!name) continue; + const typename ElfArch::Sym *sym = it.GetSymbol(); + if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) && + (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) { + typename ElfArch::Sym symbol = *sym; + if (!get_raw_symbol_values) + AdjustSymbolValue(&symbol); + sink->AddSymbol(name, symbol.st_value, symbol.st_size); + } + } + } + + // Return an ElfSectionReader for the first section of the given + // type by iterating through all section headers. Returns NULL if + // the section type is not found. + const ElfSectionReader *GetSectionByType( + typename ElfArch::Word section_type) { + for (int k = 0; k < GetNumSections(); ++k) { + if (section_headers_[k].sh_type == section_type) { + return GetSection(k); + } + } + return NULL; + } + + // Return the name of section "shndx". Returns NULL if the section + // is not found. + const char *GetSectionNameByIndex(int shndx) { + return GetSectionName(section_headers_[shndx].sh_name); + } + + // Return a pointer to section "shndx", and store the size in + // "size". Returns NULL if the section is not found. + const char *GetSectionContentsByIndex(int shndx, size_t *size) { + const ElfSectionReader *section = GetSection(shndx); + if (section != NULL) { + *size = section->section_size(); + return section->contents(); + } + return NULL; + } + + // Return a pointer to the first section of the given name by + // iterating through all section headers, and store the size in + // "size". Returns NULL if the section name is not found. + const char *GetSectionContentsByName(const string §ion_name, + size_t *size) { + for (int k = 0; k < GetNumSections(); ++k) { + const char *name = GetSectionName(section_headers_[k].sh_name); + if (name != NULL && section_name == name) { + const ElfSectionReader *section = GetSection(k); + if (section == NULL) { + return NULL; + } else { + *size = section->section_size(); + return section->contents(); + } + } + } + return NULL; + } + + // This is like GetSectionContentsByName() but it returns a lot of extra + // information about the section. + const char *GetSectionInfoByName(const string §ion_name, + ElfReader::SectionInfo *info) { + for (int k = 0; k < GetNumSections(); ++k) { + const char *name = GetSectionName(section_headers_[k].sh_name); + if (name != NULL && section_name == name) { + const ElfSectionReader *section = GetSection(k); + if (section == NULL) { + return NULL; + } else { + info->type = section->header().sh_type; + info->flags = section->header().sh_flags; + info->addr = section->header().sh_addr; + info->offset = section->header().sh_offset; + info->size = section->header().sh_size; + info->link = section->header().sh_link; + info->info = section->header().sh_info; + info->addralign = section->header().sh_addralign; + info->entsize = section->header().sh_entsize; + return section->contents(); + } + } + } + return NULL; + } + + // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD + // segments are present. This is the address an ELF image was linked + // (by static linker) to be loaded at. Usually (but not always) 0 for + // shared libraries and position-independent executables. + uint64 VaddrOfFirstLoadSegment() const { + // Relocatable objects (of type ET_REL) do not have LOAD segments. + if (header_.e_type == ET_REL) { + return 0; + } + for (int i = 0; i < GetNumProgramHeaders(); ++i) { + if (program_headers_[i].p_type == PT_LOAD) { + return program_headers_[i].p_vaddr; + } + } + LOG(ERROR) << "Could not find LOAD from program header: " << path_; + return 0; + } + + // According to the LSB ("ELF special sections"), sections with debug + // info are prefixed by ".debug". The names are not specified, but they + // look like ".debug_line", ".debug_info", etc. + bool HasDebugSections() { + for (int k = 0; k < GetNumSections(); ++k) { + const char* name = GetSectionName(section_headers_[k].sh_name); + if (strncmp(name, ".debug", strlen(".debug")) == 0) + return true; + } + return false; + } + + bool IsDynamicSharedObject() const { + return header_.e_type == ET_DYN; + } + + private: + typedef vector > AddrToSymMap; + + static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first < rhs.first; + } + + static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs, + const typename AddrToSymMap::value_type& rhs) { + return lhs.first == rhs.first; + } + + // Does this ELF file have too many sections to fit in the program header? + bool HasManySections() const { + return header_.e_shnum == SHN_UNDEF; + } + + // Return the number of program headers. + int GetNumProgramHeaders() const { + if (HasManySections() && header_.e_phnum == 0xffff && + first_section_header_.sh_info != 0) + return first_section_header_.sh_info; + return header_.e_phnum; + } + + // Return the number of sections. + int GetNumSections() const { + if (HasManySections()) + return first_section_header_.sh_size; + return header_.e_shnum; + } + + // Return the index of the string table. + int GetStringTableIndex() const { + if (HasManySections()) { + if (header_.e_shstrndx == 0xffff) + return first_section_header_.sh_link; + else if (header_.e_shstrndx >= GetNumSections()) + return 0; + } + return header_.e_shstrndx; + } + + // Given an offset into the section header string table, return the + // section name. + const char *GetSectionName(typename ElfArch::Word sh_name) { + const ElfSectionReader *shstrtab = + GetSection(GetStringTableIndex()); + if (shstrtab != NULL) { + CHECK_GE(shstrtab->section_size(), sh_name); + return shstrtab->GetOffset(sh_name); + } + return NULL; + } + + // Return an ElfSectionReader for the given section. The reader will + // be freed when this object is destroyed. + const ElfSectionReader *GetSection(int num) { + CHECK_LT(num, GetNumSections()); + ElfSectionReader *& reader = sections_[num]; + if (reader == NULL) + reader = new ElfSectionReader(path_, fd_, section_headers_[num]); + return reader; + } + + // Parse out the overall header information from the file and assert + // that it looks sane. This contains information like the magic + // number and target architecture. + bool ParseHeaders(int fd, const string &path) { + // Read in the global ELF header. + if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) { + LOG(ERROR) << "Could not read ELF header: " << path; + return false; + } + + // Must be an executable, dynamic shared object or relocatable object + if (header_.e_type != ET_EXEC && + header_.e_type != ET_DYN && + header_.e_type != ET_REL) { + LOG(ERROR) << "Not an executable, shared object or relocatable object " + "file: " << path; + return false; + } + // Need a section header. + if (header_.e_shoff == 0) { + LOG(ERROR) << "No section header: " << path; + return false; + } + + if (header_.e_shnum == SHN_UNDEF) { + // The number of sections in the program header is only a 16-bit value. In + // the event of overflow (greater than SHN_LORESERVE sections), e_shnum + // will read SHN_UNDEF and the true number of section header table entries + // is found in the sh_size field of the first section header. + // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html + if (pread(fd, &first_section_header_, sizeof(first_section_header_), + header_.e_shoff) != sizeof(first_section_header_)) { + LOG(ERROR) << "Failed to read first section header: " << path; + return false; + } + } + + // Dynamically allocate enough space to store the section headers + // and read them out of the file. + const int section_headers_size = + GetNumSections() * sizeof(*section_headers_); + section_headers_ = new typename ElfArch::Shdr[section_headers_size]; + if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) != + section_headers_size) { + LOG(ERROR) << "Could not read section headers: " << path; + return false; + } + + // Dynamically allocate enough space to store the program headers + // and read them out of the file. + const int program_headers_size = + GetNumProgramHeaders() * sizeof(*program_headers_); + program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()]; + if (pread(fd, program_headers_, program_headers_size, header_.e_phoff) != + program_headers_size) { + LOG(ERROR) << "Could not read program headers: " << path + << " Continue anyway"; + } + + // Presize the sections array for efficiency. + sections_.resize(GetNumSections(), NULL); + return true; + } + + void AdjustSymbolValue(typename ElfArch::Sym* sym) { + switch (header_.e_machine) { + case EM_ARM: + // For ARM architecture, if the LSB of the function symbol offset is set, + // it indicates a Thumb function. This bit should not be taken literally. + // Clear it. + if (ElfArch::Type(sym) == STT_FUNC) + sym->st_value = AdjustARMThumbSymbolValue(sym->st_value); + break; + case EM_386: + // No adjustment needed for Intel x86 architecture. However, explicitly + // define this case as we use it quite often. + break; + case EM_PPC: + // PowerPC architecture may need adjustment in the future. + break; + default: + break; + } + } + + friend class SymbolIterator; + + // The file we're reading. + const string path_; + // Open file descriptor for path_. Not owned by this object. + const int fd_; + + // The global header of the ELF file. + typename ElfArch::Ehdr header_; + + // The header of the first section. This may be used to supplement the ELF + // file header. + typename ElfArch::Shdr first_section_header_; + + // Array of GetNumSections() section headers, allocated when we read + // in the global header. + typename ElfArch::Shdr *section_headers_; + + // Array of GetNumProgramHeaders() program headers, allocated when we read + // in the global header. + typename ElfArch::Phdr *program_headers_; + + // An array of pointers to ElfSectionReaders. Sections are + // mmaped as they're needed and not released until this object is + // destroyed. + vector*> sections_; + + DISALLOW_EVIL_CONSTRUCTORS(ElfReaderImpl); +}; + +// Copied from strings/strutil.h. Per chatham, +// this library should not depend on strings. + +static inline bool MyHasSuffixString(const string& str, const string& suffix) { + int len = str.length(); + int suflen = suffix.length(); + return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0); +} + +ElfReader::ElfReader(const string &path) + : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) { + // linux 2.6.XX kernel can show deleted files like this: + // /var/run/nscd/dbYLJYaE (deleted) + // and the kernel-supplied vdso and vsyscall mappings like this: + // [vdso] + // [vsyscall] + if (MyHasSuffixString(path, " (deleted)")) + return; + if (path == "[vdso]") + return; + if (path == "[vsyscall]") + return; + + fd_ = open(path.c_str(), O_RDONLY); + if (fd_ == -1) { + // Not ERROR, since this gets called with things like "[heap]". + PLOG(INFO) << "Could not open " << path_; + } +} + +ElfReader::~ElfReader() { + if (fd_ != -1) + close(fd_); + if (impl32_ != NULL) + delete impl32_; + if (impl64_ != NULL) + delete impl64_; +} + + +// The only word-size specific part of this file is IsNativeElfFile(). +#if __WORDSIZE == 32 +#define NATIVE_ELF_ARCH Elf32 +#elif __WORDSIZE == 64 +#define NATIVE_ELF_ARCH Elf64 +#else +#error "Invalid word size" +#endif + +template +static bool IsElfFile(const int fd, const string &path) { + if (fd < 0) + return false; + if (!ElfReaderImpl::IsArchElfFile(fd, NULL)) { + // No error message here. IsElfFile gets called many times. + return false; + } + return true; +} + +bool ElfReader::IsNativeElfFile() const { + return IsElfFile(fd_, path_); +} + +bool ElfReader::IsElf32File() const { + return IsElfFile(fd_, path_); +} + +bool ElfReader::IsElf64File() const { + return IsElfFile(fd_, path_); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink) { + VisitSymbols(sink, -1, -1); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type) { + VisitSymbols(sink, symbol_binding, symbol_type, false); +} + +void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, + int symbol_binding, + int symbol_type, + bool get_raw_symbol_values) { + if (IsElf32File()) { + GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } else if (IsElf64File()) { + GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, + get_raw_symbol_values); + } +} + +uint64 ElfReader::VaddrOfFirstLoadSegment() { + if (IsElf32File()) { + return GetImpl32()->VaddrOfFirstLoadSegment(); + } else if (IsElf64File()) { + return GetImpl64()->VaddrOfFirstLoadSegment(); + } else { + LOG(ERROR) << "not an elf binary: " << path_; + return 0; + } +} + +const char *ElfReader::GetSectionName(int shndx) { + if (IsElf32File()) { + return GetImpl32()->GetSectionNameByIndex(shndx); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionNameByIndex(shndx); + } else { + LOG(ERROR) << "not an elf binary: " << path_; + return NULL; + } +} + +const char *ElfReader::GetSectionByIndex(int shndx, size_t *size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByIndex(shndx, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByIndex(shndx, size); + } else { + LOG(ERROR) << "not an elf binary: " << path_; + return NULL; + } +} + +const char *ElfReader::GetSectionByName(const string §ion_name, + size_t *size) { + if (IsElf32File()) { + return GetImpl32()->GetSectionContentsByName(section_name, size); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionContentsByName(section_name, size); + } else { + LOG(ERROR) << "not an elf binary: " << path_; + return NULL; + } +} + +const char *ElfReader::GetSectionInfoByName(const string §ion_name, + SectionInfo *info) { + if (IsElf32File()) { + return GetImpl32()->GetSectionInfoByName(section_name, info); + } else if (IsElf64File()) { + return GetImpl64()->GetSectionInfoByName(section_name, info); + } else { + LOG(ERROR) << "not an elf binary: " << path_; + return NULL; + } +} + +string ElfReader::GetBuildId() { + size_t size; + + // Hex dump of section '.note.gnu.build-id': + // 0x00400280 04000000 10000000 03000000 474e5500 ............GNU. + // 0x00400290 76fe55ee a70df375 dd752205 334a51e0 v.U....u.u".3JQ. + const char *build_id_section = ".note.gnu.build-id"; + const char *build_id_suffix = "00000000"; + const char *data = GetSectionByName(build_id_section, &size); + if (size != 32) { + LOG(ERROR) << "Malformed .note.gnu.build-id section."; + return ""; + } + + char build_id[33]; + const char *src = data + 16; + for (int i = 0; i < 16; i++) { + snprintf(&build_id[2*i], sizeof(build_id[i]) * 3, "%02x", src[i]); + } + return string(build_id) + build_id_suffix; +} + +bool ElfReader::IsDynamicSharedObject() { + if (IsElf32File()) { + return GetImpl32()->IsDynamicSharedObject(); + } else if (IsElf64File()) { + return GetImpl64()->IsDynamicSharedObject(); + } else { + LOG(ERROR) << "not an elf binary: " << path_; + return false; + } +} + +ElfReaderImpl *ElfReader::GetImpl32() { + if (impl32_ == NULL) { + impl32_ = new ElfReaderImpl(path_, fd_); + } + return impl32_; +} + +ElfReaderImpl *ElfReader::GetImpl64() { + if (impl64_ == NULL) { + impl64_ = new ElfReaderImpl(path_, fd_); + } + return impl64_; +} + +// Return true if file is an ELF binary of ElfArch, with unstripped +// debug info (debug_only=true) or symbol table (debug_only=false). +// Otherwise, return false. +template +static bool IsNonStrippedELFBinaryImpl(const string &path, const int fd, + bool debug_only) { + if (!ElfReaderImpl::IsArchElfFile(fd, NULL)) return false; + ElfReaderImpl elf_reader(path, fd); + return debug_only ? + elf_reader.HasDebugSections() + : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL); +} + +// Helper for the IsNon[Debug]StrippedELFBinary functions. +static bool IsNonStrippedELFBinaryHelper(const string &path, + bool debug_only) { + const int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) { + return false; + } + + if (IsNonStrippedELFBinaryImpl(path, fd, debug_only) || + IsNonStrippedELFBinaryImpl(path, fd, debug_only)) { + close(fd); + return true; + } + close(fd); + return false; +} + +bool ElfReader::IsNonStrippedELFBinary(const string &path) { + return IsNonStrippedELFBinaryHelper(path, false); +} + +bool ElfReader::IsNonDebugStrippedELFBinary(const string &path) { + return IsNonStrippedELFBinaryHelper(path, true); +} +} // namespace autofdo Index: lib/ProfileData/PerfConverter/symbolize/functioninfo.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/functioninfo.h @@ -0,0 +1,221 @@ +//=-- functioninfo.h --------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions for a DWARF2/3 information +// collector that uses the DWARF2/3 reader interface to build a mapping +// of addresses to files, lines, and functions. +// This is much more of an example of using the readers than anything +// else, the linemap would use too much memory for a real program. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_FUNCTIONINFO_H__ +#define AUTOFDO_SYMBOLIZE_FUNCTIONINFO_H__ + +#include +#include +#include + +#include "symbolize/bytereader.h" +#include "symbolize/dwarf2reader.h" + +namespace autofdo { + +typedef pair DirectoryFilePair; + +struct FunctionInfo { + // Name of the function + const char *name; + // File containing this function + DirectoryFilePair file; + // Line number for start of function. + uint32 line; + // Beginning address for this function + uint64 lowpc; + // End address for this function. + uint64 highpc; +}; + +typedef map FunctionMap; + +struct LineIdentifier { + LineIdentifier() : file(), line(), discriminator() { } + LineIdentifier(const DirectoryFilePair &file_in, uint32 line_in, + uint32 discriminator_in) + : file(file_in), line(line_in), discriminator(discriminator_in) { } + + DirectoryFilePair file; + uint32 line; + uint32 discriminator; +}; + +typedef map AddressToLineMap; + +static int strcmp_maybe_null(const char *a, const char *b) { + if (a == 0 && b == 0) { + return 0; + } else if (a == 0) { + return -1; + } else if (b == 0) { + return 1; + } else { + return strcmp(a, b); + } +} + +// Compares two LineIdentifiers. This is useful when using +// LineIdentifiers as the key of an STL map. The comparison is +// lexicographic order first based on directory name, then filename, +// and finally line number. +// +// Note, this comparator treats the following cases differently: +// +// (("/", "foo/bar.cc"), 1) +// (("/foo", "bar.cc"), 1) +// (("/foo/baz", "../bar.cc"), 1) +// +// While all three point to the same file and line number, they will +// not be considered equal by the comparator. The order shown is the +// order returned by the comparator. However, since these patterns +// will not happen in google3 code, it should not matter. +struct LessThanLineIdentifier { + bool operator()(const LineIdentifier &line_a, + const LineIdentifier &line_b) const { + int cmp = strcmp_maybe_null(line_a.file.first, line_b.file.first); + if (cmp == 0) { + cmp = strcmp_maybe_null(line_a.file.second, line_b.file.second); + if (cmp == 0) { + cmp = (line_a.line - line_b.line); + if (cmp == 0) + cmp = (line_a.discriminator - line_b.discriminator); + } + } + return (cmp < 0); + } +}; + +// This class is a basic line info handler that fills in the dirs, +// file, and linemap passed into it with the data produced from the +// LineInfoHandler. +class CULineInfoHandler: public LineInfoHandler { + public: + CULineInfoHandler(FileVector* files, + DirectoryVector* dirs, + AddressToLineMap* linemap); + CULineInfoHandler(FileVector* files, + DirectoryVector* dirs, + AddressToLineMap* linemap, + const map *sampled_functions); + virtual ~CULineInfoHandler() { } + + // Called when we define a directory. We just place NAME into dirs_ + // at position DIR_NUM. + virtual void DefineDir(const char *name, uint32 dir_num); + + // Called when we define a filename. We just place + // concat(dirs_[DIR_NUM], NAME) into files_ at position FILE_NUM. + virtual void DefineFile(const char *name, int32 file_num, + uint32 dir_num, uint64 mod_time, uint64 length); + + + // Called when the line info reader has a new line, address pair + // ready for us. ADDRESS is the address of the code, FILE_NUM is + // the file number containing the code, LINE_NUM is the line number + // in that file for the code, and COLUMN_NUM is the column number + // the code starts at, if we know it (0 otherwise). + // + // If this function is called more than once with the same address, the + // information from the last call is stored. + virtual void AddLine(uint64 address, uint32 file_num, uint32 line_num, + uint32 column_num, uint32 discriminator); + + + static string MergedFilename(const DirectoryFilePair& filename); + + private: + void Init(); + // Returns true if address should be added to linemap_. + bool ShouldAddAddress(uint64 address) const; + + AddressToLineMap* linemap_; + FileVector* files_; + DirectoryVector* dirs_; + const map *sampled_functions_; + DISALLOW_EVIL_CONSTRUCTORS(CULineInfoHandler); +}; + +class CUFunctionInfoHandler: public Dwarf2Handler { + public: + CUFunctionInfoHandler(FileVector* files, + DirectoryVector* dirs, + AddressToLineMap* linemap, + FunctionMap* offset_to_funcinfo, + FunctionMap* address_to_funcinfo, + CULineInfoHandler* linehandler, + const SectionMap& sections, + ByteReader* reader) + : files_(files), dirs_(dirs), linemap_(linemap), + offset_to_funcinfo_(offset_to_funcinfo), + address_to_funcinfo_(address_to_funcinfo), + linehandler_(linehandler), sections_(sections), + reader_(reader), current_function_info_(NULL) { } + + virtual ~CUFunctionInfoHandler() { } + + // Start to process a compilation unit at OFFSET from the beginning of the + // debug_info section. We want to see all compilation units, so we + // always return true. + + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version); + + // Start to process a DIE at OFFSET from the beginning of the + // debug_info section. We only care about function related DIE's. + virtual bool StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs); + + // Called when we have an attribute with unsigned data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + + // Called when we have an attribute with string data to give to + // our handler. The attribute is for the DIE at OFFSET from the + // beginning of compilation unit, has a name of ATTR, a form of + // FORM, and the actual data of the attribute is in DATA. + virtual void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char *data); + + // Called when finished processing the DIE at OFFSET. + // Because DWARF2/3 specifies a tree of DIEs, you may get starts + // before ends of the previous DIE, as we process children before + // ending the parent. + virtual void EndDIE(uint64 offset); + + private: + FileVector* files_; + DirectoryVector* dirs_; + AddressToLineMap* linemap_; + FunctionMap* offset_to_funcinfo_; + FunctionMap* address_to_funcinfo_; + CULineInfoHandler* linehandler_; + const SectionMap& sections_; + ByteReader* reader_; + FunctionInfo* current_function_info_; + DISALLOW_EVIL_CONSTRUCTORS(CUFunctionInfoHandler); +}; + +} // namespace autofdo +#endif // AUTOFDO_SYMBOLIZE_FUNCTIONINFO_H__ Index: lib/ProfileData/PerfConverter/symbolize/functioninfo.cc =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/functioninfo.cc @@ -0,0 +1,208 @@ +//=-- functioninfo.cc -------------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A simple example of how to use the DWARF2/3 reader to +// extract function and line information from the debug info. +// You very much do not want to just slurp up everything like this +// does. You more likely want to only process things you are +// interested in. +// It is sane to build functioninfo for the entire program at once in +// most cases. +// It is usually insane to build a line map that consists of 32 bytes +// per line in the original program :) +// +//===----------------------------------------------------------------------===// +#include "symbolize/functioninfo.h" + +#include +#include + +#include "symbolize/dwarf2enums.h" + +namespace autofdo { + +CULineInfoHandler::CULineInfoHandler(FileVector* files, + DirectoryVector* dirs, + AddressToLineMap* linemap) + : linemap_(linemap), files_(files), dirs_(dirs), + sampled_functions_(NULL) { + Init(); +} + +CULineInfoHandler::CULineInfoHandler( + FileVector* files, + DirectoryVector* dirs, + AddressToLineMap* linemap, + const map *sampled_functions) + : linemap_(linemap), files_(files), dirs_(dirs), + sampled_functions_(sampled_functions) { + Init(); +} +void CULineInfoHandler::Init() { + // The dirs and files are 1 indexed, so just make sure we put + // nothing in the 0 vector. + CHECK_EQ(dirs_->size(), 0); + CHECK_EQ(files_->size(), 0); + dirs_->push_back(""); + files_->push_back(make_pair(0, "")); +} + +bool CULineInfoHandler::ShouldAddAddress(uint64 address) const { + // Looks for the first entry above the given address, then decrement the + // iterator, then check that it's within the range [start, start + len). + if (sampled_functions_ == NULL) { + return true; + } + map::const_iterator iter = sampled_functions_->upper_bound( + address); + if (iter == sampled_functions_->begin()) { + return false; + } + --iter; + return address < iter->first + iter->second; +} + +void CULineInfoHandler::DefineDir(const char *name, uint32 dir_num) { + // These should never come out of order, actually + CHECK_EQ(dir_num, dirs_->size()); + dirs_->push_back(name); +} + +void CULineInfoHandler::DefineFile(const char *name, + int32 file_num, uint32 dir_num, + uint64 mod_time, uint64 length) { + // These should never come out of order, actually. + CHECK_GE((int)dir_num, 0); + CHECK_LT(dir_num, dirs_->size()); + if ((unsigned)file_num == files_->size() || file_num == -1) { + files_->push_back(make_pair(dir_num, name)); + } else { + LOG(INFO) << "error in DefineFile"; + } +} + +void CULineInfoHandler::AddLine(uint64 address, uint32 file_num, + uint32 line_num, uint32 column_num, + uint32 discriminator) { + if (!ShouldAddAddress(address)) { + return; + } + if (file_num < files_->size()) { + const pair& file = (*files_)[file_num]; + if ((unsigned)file.first < dirs_->size()) { + DirectoryFilePair file_and_dir = make_pair((*dirs_)[file.first], + file.second); + LineIdentifier line_id(file_and_dir, line_num, discriminator); + (*linemap_)[address] = line_id; + } else { + LOG(INFO) << "error in AddLine (bad dir_num " << file.first << ")"; + } + } else { + LOG(INFO) << "error in AddLine (bad file_num " << file_num << ")"; + } +} + +string CULineInfoHandler::MergedFilename(const pair& filename) { + string dir = filename.first; + if (dir.empty()) + return filename.second; + else + return dir + "/" + filename.second; +} + +bool CUFunctionInfoHandler::StartCompilationUnit(uint64 offset, + uint8 address_size, + uint8 offset_size, + uint64 cu_length, + uint8 dwarf_version) { + return true; +} + + +// For function info, we only care about subprograms and inlined +// subroutines. For line info, the DW_AT_stmt_list lives in the +// compile unit tag. + +bool CUFunctionInfoHandler::StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { + switch (tag) { + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: { + current_function_info_ = new FunctionInfo; + current_function_info_->lowpc = current_function_info_->highpc = 0; + current_function_info_->name = ""; + current_function_info_->line = 0; + current_function_info_->file = make_pair("", ""); + offset_to_funcinfo_->insert(make_pair(offset, current_function_info_)); + FALLTHROUGH_INTENDED; + } + case DW_TAG_compile_unit: + return true; + default: + return false; + } + return false; +} + +// Only care about the name attribute for functions + +void CUFunctionInfoHandler::ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char *data) { + if (attr == DW_AT_name && current_function_info_) + current_function_info_->name = data; +} + +void CUFunctionInfoHandler::ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + if (attr == DW_AT_stmt_list) { + SectionMap::const_iterator iter = sections_.find(".debug_line"); + CHECK(iter != sections_.end()); + + LineInfo lireader(iter->second.first + data, iter->second.second - data, + reader_, linehandler_); + lireader.Start(); + } else if (current_function_info_) { + switch (attr) { + case DW_AT_low_pc: + current_function_info_->lowpc = data; + break; + case DW_AT_high_pc: + current_function_info_->highpc = data; + break; + case DW_AT_decl_line: + current_function_info_->line = data; + break; + case DW_AT_decl_file: + if (data < files_->size()) { + const FileVector::value_type& file = (*files_)[data]; + CHECK_LT((unsigned)file.first, dirs_->size()); + const char *dir = (*dirs_)[file.first]; + current_function_info_->file = make_pair(dir, file.second); + } else { + LOG(INFO) << "unexpected file_num " << data; + } + break; + default: + break; + } + } +} + +void CUFunctionInfoHandler::EndDIE(uint64 offset) { + if (current_function_info_ && current_function_info_->lowpc) + address_to_funcinfo_->insert(make_pair(current_function_info_->lowpc, + current_function_info_)); +} + +} // namespace autofdo Index: lib/ProfileData/PerfConverter/symbolize/line_state_machine.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/line_state_machine.h @@ -0,0 +1,42 @@ +//=-- line_state_machine.h --------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_LINE_STATE_MACHINE_H__ +#define AUTOFDO_SYMBOLIZE_LINE_STATE_MACHINE_H__ + +namespace autofdo { + +// This is the format of a DWARF2/3 line state machine that we process +// opcodes using. There is no need for anything outside the lineinfo +// processor to know how this works. +struct LineStateMachine { + void Reset(bool default_is_stmt) { + file_num = 1; + address = 0; + line_num = 1; + column_num = 0; + discriminator = 0; + is_stmt = default_is_stmt; + basic_block = false; + end_sequence = false; + } + + uint32 file_num; + uint64 address; + uint64 line_num; + uint32 column_num; + uint32 discriminator; + bool is_stmt; // stmt means statement. + bool basic_block; + bool end_sequence; +}; + +} // namespace autofdo + + +#endif // AUTOFDO_SYMBOLIZE_LINE_STATE_MACHINE_H__ Index: lib/ProfileData/PerfConverter/symbolize/nonoverlapping_range_map.h =================================================================== --- /dev/null +++ lib/ProfileData/PerfConverter/symbolize/nonoverlapping_range_map.h @@ -0,0 +1,240 @@ +//=-- nonoverlapping_range_map.h --------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a simple map wrapper (NonOverlappingRangeMap) +// that maps address ranges to data. The map is used to support +// efficient lookup of debug information given a query address. +// +//===----------------------------------------------------------------------===// +#ifndef AUTOFDO_SYMBOLIZE_NONOVERLAPPING_RANGE_MAP_H_ +#define AUTOFDO_SYMBOLIZE_NONOVERLAPPING_RANGE_MAP_H_ + +#include +#include +#include +#include + +#include "symbolize/dwarf3ranges.h" + +namespace autofdo { + +struct RangeStartLt { + bool operator()(const AddressRangeList::Range& r1, + const AddressRangeList::Range& r2) const { + return r1.first < r2.first; + } +}; + +// NonOverlappingRangeMap maps address ranges +// (AddressRangeList::Range) to data. The ranges in the map are +// guaranteed to be non-overlapping. The map supports looking up an +// address (rather than a range) and the data for the containing +// range, if it exists, is returned. +// +// To guarantee that the ranges stored are non-overlapping, some +// checks are done upon insert. First, if a range is inserted that is +// completely contained within another range, it will break the other +// range into two pieces and replace the mapping for the overlapping +// region. If the inserted range intersects the middle of another +// range in any other way, a CHECK will fail. +// +// Second, if a range is inserted that completely contains one or more +// ranges, the new only fills in the gaps. For example, if the range +// [5,7) and [10,12) are already in the map, an insert of [0,15) is +// identical to the following three inserts: [0,5), [7,10), [12,15). +// This convenience behavior is useful when inserting data for +// hierarchical structures in bottom-up order. +template +class NonOverlappingRangeMap { + public: + typedef map RangeMap; + typedef typename RangeMap::iterator Iterator; + typedef typename RangeMap::const_iterator ConstIterator; + + NonOverlappingRangeMap(); + + void InsertRangeList(const AddressRangeList::RangeList& range_list, + const T& value); + void InsertRange(uint64 low, uint64 high, const T& value); + Iterator Find(uint64 address); + ConstIterator Find(uint64 address) const; + + Iterator Begin(); + ConstIterator Begin() const; + Iterator End(); + ConstIterator End() const; + + bool Empty() const { return ranges_.empty(); } + + private: + RangeMap ranges_; + template + IteratorType FindHelper(uint64 address, IteratorType iter, + IteratorType end) const; + bool RangeStrictlyContains(const AddressRangeList::Range& outer, + const AddressRangeList::Range& inner); + void SplitRange(Iterator split, uint64 low, uint64 high, const T& value); + DISALLOW_COPY_AND_ASSIGN(NonOverlappingRangeMap); +}; + +template +NonOverlappingRangeMap::NonOverlappingRangeMap() { } + +template +void NonOverlappingRangeMap::InsertRangeList( + const AddressRangeList::RangeList& range_list, const T& value) { + + for (AddressRangeList::RangeList::const_iterator iter = range_list.begin(); + iter != range_list.end(); ++iter) { + InsertRange(iter->first, iter->second, value); + } +} + +template +void NonOverlappingRangeMap::InsertRange(uint64 low, uint64 high, + const T& value) { + if (low == high) + return; + + Iterator insert_point = ranges_.lower_bound(make_pair(low, high)); + + if (insert_point != ranges_.begin()) { + Iterator predecessor = insert_point; + --predecessor; + + if (RangeStrictlyContains(predecessor->first, make_pair(low, high))) { + SplitRange(predecessor, low, high, value); + return; + } + + // No containment. Check that the predecessor does not overlap with range + CHECK(predecessor->first.second <= low); + } + + if (insert_point != ranges_.end() && + RangeStrictlyContains(insert_point->first, make_pair(low, high))) { + SplitRange(insert_point, low, high, value); + return; + } + + while (low < high) { + if (insert_point == ranges_.end()) { + ranges_.insert(make_pair(make_pair(low, high), value)); + break; + } else { + if (low != insert_point->first.first) { + // low < insert_point->first.first by the invariants of lower_bound + uint64 from = low; + uint64 to = min(high, insert_point->first.first); + + // Ensure that insert does not end in the middle of another range + CHECK(to == high || high >= insert_point->first.second); + CHECK(from < to); + pair insert_status = + ranges_.insert(make_pair(make_pair(from, to), value)); + CHECK(insert_status.second); + } + low = min(high, insert_point->first.second); + } + ++insert_point; + } +} + +template +typename NonOverlappingRangeMap::Iterator +NonOverlappingRangeMap::Find(uint64 address) { + AddressRangeList::Range singleton_range = make_pair(address, address + 1); + Iterator iter = ranges_.lower_bound(singleton_range); + return FindHelper(address, iter, ranges_.end()); +} + +template +typename NonOverlappingRangeMap::ConstIterator +NonOverlappingRangeMap::Find(uint64 address) const { + AddressRangeList::Range singleton_range = make_pair(address, address + 1); + ConstIterator iter = ranges_.lower_bound(singleton_range); + return FindHelper(address, iter, ranges_.end()); +} + +template +template +IteratorType NonOverlappingRangeMap::FindHelper(uint64 address, + IteratorType iter, + IteratorType end) const { + if (iter == end || iter->first.first != address) { + if (iter == ranges_.begin()) + return end; + --iter; + } + + if (iter->first.second > address) + return iter; + return end; +} + +template +typename NonOverlappingRangeMap::Iterator +NonOverlappingRangeMap::Begin() { + return ranges_.begin(); +} + +template +typename NonOverlappingRangeMap::ConstIterator +NonOverlappingRangeMap::Begin() const { + return ranges_.begin(); +} + +template +typename NonOverlappingRangeMap::Iterator +NonOverlappingRangeMap::End() { + return ranges_.end(); +} + +template +typename NonOverlappingRangeMap::ConstIterator +NonOverlappingRangeMap::End() const { + return ranges_.end(); +} + +template +bool NonOverlappingRangeMap::RangeStrictlyContains( + const AddressRangeList::Range& outer, + const AddressRangeList::Range& inner) { + return (outer.first <= inner.first) && (outer.second >= inner.second) && + ((outer.first != inner.first) || (outer.second != inner.second)); +} + +template +void NonOverlappingRangeMap::SplitRange(Iterator split, uint64 low, + uint64 high, const T& value) { + const AddressRangeList::Range old_range = split->first; + const T old_value = split->second; + pair insert_status; + + ranges_.erase(split); + + if (low != old_range.first) { + insert_status = + ranges_.insert(make_pair(make_pair(old_range.first, low), old_value)); + CHECK(insert_status.second); + } + + insert_status = ranges_.insert(make_pair(make_pair(low, high), value)); + CHECK(insert_status.second); + + if (high != old_range.second) { + insert_status = + ranges_.insert(make_pair(make_pair(high, old_range.second), old_value)); + CHECK(insert_status.second); + } +} + +} // namespace autofdo + +#endif // AUTOFDO_SYMBOLIZE_NONOVERLAPPING_RANGE_MAP_H_ Index: test/tools/llvm-profdata/perf-basic.test =================================================================== --- /dev/null +++ test/tools/llvm-profdata/perf-basic.test @@ -0,0 +1,30 @@ +# To regenerate, compile the following program with -gmlt +# +# #define N 10000000 +# volatile double A; +# int main() { +# for (int i = 0; i < N; i++) { +# A *= i / 32; +# } +# return 0; +# } +# +# You also need a working copy of Linux Perf (3.13 and newer). +# +# $ clang -gmlt -o perf-basic perf-basic.cc +# $ perf record -b ./perf-basic +# $ llvm-profdata convert --binary=./perf-basic -o - perf.data +# main:56700:0 +# 1: 810 +# 1.2: 810 +# 2: 810 +# +# Note that the counters will likely be different numbers. This is +# normal and expected. + +RUN: llvm-profdata convert --binary=%p/Inputs/perf-basic -o - %p/Inputs/perf-basic.data | FileCheck %s -check-prefix=CHECK + +CHECK: main:56700:0 +CHECK: 1: 810 +CHECK: 1.2: 810 +CHECK: 2: 810 Index: test/tools/llvm-profdata/perf-cond.test =================================================================== --- /dev/null +++ test/tools/llvm-profdata/perf-cond.test @@ -0,0 +1,41 @@ +# To regenerate, compile the following program with -gmlt +# +# #define N 10000000 +# volatile double A; +# int main() { +# for (int i = 0; i < N; i++) { +# if (i < N / 20) +# A *= i / 32; +# else +# A /= i - 49; +# } +# return 0; +# } +# +# You also need a working copy of Linux Perf (3.13 and newer). +# +# $ clang -gmlt -o perf-cond perf-cond.cc +# $ perf record -b ./perf-cond +# $ llvm-profdata convert --binary=./perf-cond -o - perf.data +# main:18700:0 +# 1: 270 +# 1.2: 270 +# 2: 270 +# 3: 20 +# 5: 250 +# 6: 270 +# +# Note that the counters will likely be different numbers. This is +# normal and expected. + +RUN: llvm-profdata convert --binary=%p/Inputs/perf-cond -o - %p/Inputs/perf-cond.data | FileCheck %s -check-prefix=CHECK + +CHECK: main:18700:0 +CHECK: 1: 270 +CHECK: 1.2: 270 +CHECK: 2: 270 + +# The conditional at line 3 is only executed about 20% of the time. +CHECK: 3: 20 +CHECK: 5: 250 +CHECK: 6: 270 Index: test/tools/llvm-profdata/perf-fncall.test =================================================================== --- /dev/null +++ test/tools/llvm-profdata/perf-fncall.test @@ -0,0 +1,55 @@ +# To regenerate, compile the following program with -gmlt +# +# #define N 10000000 +# volatile double A; +# +# void foo(int i) { +# if (i < N / 20) +# A *= i / 32; +# else +# A /= i - 49; +# } +# +# int main() { +# for (int i = 0; i < N; i++) foo(i); +# return 0; +# } +# +# You also need a working copy of Linux Perf (3.13 and newer). +# +# $ clang -gmlt -o perf-fncall perf-fncall.cc +# $ perf record -b ./perf-fncall +# $ llvm-profdata convert --binary=./perf-fncall -o - perf.data +# _Z3fooi:9808:209 +# 0: 209 +# 1: 209 +# 2: 15 +# 4: 163 +# 5: 183 +# main:6271:0 +# 1.2: 215 _Z3fooi:220 +# 1.3: 218 +# 1.4: 215 +# +# Note that the counters will likely be different numbers. This is +# normal and expected. + +RUN: llvm-profdata convert --binary=%p/Inputs/perf-fncall -o - %p/Inputs/perf-fncall.data | FileCheck %s + +CHECK: _Z3fooi:9808:209 + +# Line numbers are relative to the start of the function (line 0 is +# the line containing the function name). +CHECK: 0: 209 +CHECK: 1: 209 + +# The true branch of this conditional executes about 20% of the time. +CHECK: 2: 15 +CHECK: 4: 163 +CHECK: 5: 183 +CHECK: main:6271:0 + +# Indicates a call to foo(i) on discriminator #2 of line 1. +CHECK: 1.2: 215 _Z3fooi:220 +CHECK: 1.3: 218 +CHECK: 1.4: 215 Index: tools/llvm-profdata/CMakeLists.txt =================================================================== --- tools/llvm-profdata/CMakeLists.txt +++ tools/llvm-profdata/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_LINK_COMPONENTS profiledata support) +set(LLVM_LINK_COMPONENTS profiledata support perfconverter) add_llvm_tool(llvm-profdata llvm-profdata.cpp Index: tools/llvm-profdata/LLVMBuild.txt =================================================================== --- tools/llvm-profdata/LLVMBuild.txt +++ tools/llvm-profdata/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llvm-profdata parent = Tools -required_libraries = ProfileData Support +required_libraries = ProfileData Support PerfConverter Index: tools/llvm-profdata/Makefile =================================================================== --- tools/llvm-profdata/Makefile +++ tools/llvm-profdata/Makefile @@ -9,7 +9,7 @@ LEVEL := ../.. TOOLNAME := llvm-profdata -LINK_COMPONENTS := profiledata support +LINK_COMPONENTS := profiledata support perfconverter # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" +#include "llvm/ProfileData/SampleProfileConverter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" @@ -150,6 +151,64 @@ return 0; } +int convert_main(int argc, const char *argv[]) { + cl::opt InputProfile(cl::Positional, cl::Required, + cl::desc("")); + + using namespace samplepgo; + + cl::opt ProfileType( + "profile-type", cl::desc("Input profile type:"), cl::init(LinuxPerf), + cl::values( + clEnumVal(LinuxPerf, "Linux Perf (https://perf.wiki.kernel.org/)"), + clEnumValEnd)); + + cl::opt OutputProfile( + "out", cl::desc("Output profile file name (Use '-' to emit to stdout)")); + cl::alias OutputProfileA("o", cl::desc("Alias for --output"), + cl::aliasopt(OutputProfile)); + + cl::opt InputBinary( + "binary", cl::desc("Input binary executable (in ELF format) to read " + "(this executable must be compiled with -gmlt)")); + + const char *usage = "\nConverts a sample profile collected with Linux Perf " + "(https://perf.wiki.kernel.org/)\n" + "into an LLVM sample profile. The output file can be " + "used with Clang's -fprofile-sample-use flag.\n" + "\nSample usage:\n" + "\n$ llvm-profdata --out=perf.llvm --binary=a.out " + "--type=LinuxPerf perf.data\n"; + + cl::ParseCommandLineOptions(argc, argv, usage); + + if (InputProfile.empty()) { + errs() << usage; + errs() << "Need a name for the input profile file.\n"; + return 1; + } + + if (OutputProfile.empty()) { + errs() << usage; + errs() << "Need a name for the output LLVM profile file.\n"; + errs() << "Use --out to specify an output file.\n"; + return 1; + } + + if (InputBinary.empty()) { + errs() << usage; + errs() << "Need a name for the ELF executable.\n"; + errs() << "Use --binary to specify an input executable file.\n"; + return 1; + } + + SampleProfileConverter Converter(InputBinary); + if (!Converter.createProfile(InputProfile, ProfileType, OutputProfile)) + return -1; + + return 0; +} + int main(int argc, const char *argv[]) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(); @@ -164,6 +223,8 @@ func = merge_main; else if (strcmp(argv[1], "show") == 0) func = show_main; + else if (strcmp(argv[1], "convert") == 0) + func = convert_main; if (func) { std::string Invocation(ProgName.str() + " " + argv[1]); @@ -178,7 +239,7 @@ errs() << "OVERVIEW: LLVM profile data tools\n\n" << "USAGE: " << ProgName << " [args...]\n" << "USAGE: " << ProgName << " -help\n\n" - << "Available commands: merge, show\n"; + << "Available commands: merge, show, convert\n"; return 0; } } @@ -188,6 +249,6 @@ else errs() << ProgName << ": Unknown command!\n"; - errs() << "USAGE: " << ProgName << " [args...]\n"; + errs() << "USAGE: " << ProgName << " [args...]\n"; return 1; }