diff --git a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h --- a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h @@ -109,6 +109,10 @@ /// file position. uint64_t tell(); + llvm::raw_pwrite_stream &get_stream() { + return OS; + } + private: FileWriter(const FileWriter &rhs) = delete; void operator=(const FileWriter &rhs) = delete; diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,229 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H + +#include +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +namespace llvm { + +namespace gsym { +class FileWriter; + +/// GsymCreator is used to emit GSYM data to a stand alone file or section +/// within a file. +/// +/// The GsymCreator is designed to be used in 3 stages: +/// - Create FunctionInfo objects and add them +/// - Finalize the GsymCreator object +/// - Save to file or section +/// +/// The first stage involves creating FunctionInfo objects from another source +/// of information like compiler debug info metadata, DWARF or Breakpad files. +/// Any strings in the FunctionInfo or contained information, like InlineInfo +/// or LineTable objects, should get the string table offsets by calling +/// GsymCreator::insertString(...). Any file indexes that are needed should be +/// obtained by calling GsymCreator::insertFile(...). All of the function calls +/// in GsymCreator are thread safe. This allows multiple threads to create and +/// add FunctionInfo objects while parsing debug information. +/// +/// Once all of the FunctionInfo objects have been added, the +/// GsymCreator::finalize(...) must be called prior to saving. This function +/// will sort the FunctionInfo objects, finalize the string table, and do any +/// other passes on the information needed to prepare the information to be +/// saved. +/// +/// Once the object has been finalized, it can be saved to a file or section. +/// +/// ENCODING +/// +/// GSYM files are designed to be memory mapped into a process as shared, read +/// only data, and used as is. +/// +/// The GSYM file format when in a stand alone file consists of: +/// - Header +/// - Address Table +/// - Function Info Offsets +/// - File Table +/// - String Table +/// - Function Info Data +/// +/// HEADER +/// +/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h". +/// +/// ADDRESS TABLE +/// +/// The address table immediately follows the header in the file and consists +/// of Header.NumAddresses address offsets. These offsets are sorted and can be +/// binary searched for efficient lookups. Addresses in the address table are +/// stored as offsets from a 64 bit base address found in Header.BaseAddress. +/// This allows the address table to contain 8, 16, or 32 offsets. This allows +/// the address table to not require full 64 bit addresses for each address. +/// The resulting GSYM size is smaller and causes fewer pages to be touched +/// during address lookups when the address table is smaller. The size of the +/// address offsets in the address table is specified in the header in +/// Header.AddrOffSize. The first offset in the address table is alinged to +/// Header.AddrOffSize alignement to ensure efficient access when loaded into +/// memory. +/// +/// FUNCTION INFO OFFSETS TABLE +/// +/// The function info offsets table immediately follows the address table and +/// consists of Header.NumAddresses 32 bit file offsets: one for each address +/// in the address table. This data is algined to a 4 byte boundary. The +/// offsets in this table are the relative offsets from the start offset of the +/// GSYM header and point to the function info data for each address in the +/// address table. Keeping this data separate from the address table helps to +/// reduce the number of pages that are touched when address lookups occur on a +/// GSYM file. +/// +/// FILE TABLE +/// +/// The file table immediately follows the function info offsets table. The +/// encoding of the FileTable is: +/// +/// struct FileTable { +/// uint32_t Count; +/// FileEntry Files[]; +/// }; +/// +/// The file table starts with a 32 bit count of the number of files that are +/// used in all of the function info, followed by that number of FileEntry +/// structures. The file table is aligned to a 4 byte boundary, Each file in +/// the file table is represented with a FileEntry structure. +/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details. +/// +/// STRING TABLE +/// +/// The string table follows the file table in stand alone GSYM files and +/// contains all strings for everything contained in the GSYM file. Any string +/// data should be added to the string table and any references to strings +/// inside GSYM information must be stored as 32 bit string table offsets into +/// this string table. The string table always starts with an empty string at +/// offset zero and is followed by any strings needed by the GSYM information. +/// The start of the string table is not aligned to any boundary. +/// +/// FUNCTION INFO DATA +/// +/// The function info data is the payload that contains information about the +/// address that is being looked up. It contains all of the encoded +/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an +/// entry in the Function Info Offsets Table. For details on the exact encoding +/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h". +class GsymCreator { + // Private member variables require Mutex protections + mutable std::recursive_mutex Mutex; + std::vector Funcs; + StringTableBuilder StrTab; + DenseMap FileEntryToIndex; + std::vector Files; + std::vector UUID; + bool Finalized = false; + +public: + + GsymCreator(); + + /// Save a GSYM file to a stand alone file. + /// + /// \param Path The file path to save the GSYM file to. + /// \param ByteOrder The endianness to use when saving the file. + /// \returns An error object that indicates success or failure of the save. + llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const; + + /// Encode a GSYM into the file writer stream at the current position. + /// + /// \param O The stream to save the binary data to + /// \returns An error object that indicates success or failure of the save. + llvm::Error encode(FileWriter &O) const; + + /// Insert a string into the GSYM string table. + /// + /// All strings used by GSYM files must be uniqued by adding them to this + /// string pool and using the returned offset for any string values. + /// + /// \param S The string to insert into the string table. + /// \returns The unique 32 bit offset into the string table. + uint32_t insertString(StringRef S); + + /// Insert a file into this GSYM creator. + /// + /// Inserts a file by adding a FileEntry into the "Files" member variable if + /// the file has not already been added. The file path is split into + /// directory and filename which are both added to the string table. This + /// allows paths to be stored efficiently by reusing the directories that are + /// common between multiple files. + /// + /// \param Path The path to the file to insert. + /// \param Style The path style for the "Path" parameter. + /// \returns The unique file index for the inserted file. + uint32_t insertFile(StringRef Path, + llvm::sys::path::Style = llvm::sys::path::Style::native); + + /// Add a function info to this GSYM creator. + /// + /// All information in the FunctionInfo object must use the + /// GsymCreator::insertString(...) function when creating string table + /// offsets for names and other strings. + /// + /// \param FI The function info object to emplace into our functions list. + void addFunctionInfo(FunctionInfo &&FI); + + /// Finalize the data in the GSYM creator prior to saving the data out. + /// + /// Finalize must be called after all FunctionInfo objects have been added + /// and before GsymCreator::save() is called. + /// + /// \param OS Output stream to report duplicate function infos, overlapping + /// function infos, and function infos that were merged or removed. + /// \returns An error object that indicates success or failure of the + /// finalize. + llvm::Error finalize(llvm::raw_ostream &OS); + + /// Set the UUID value. + /// + /// \param UUIDBytes The new UUID bytes. + void setUUID(llvm::ArrayRef UUIDBytes) { + UUID.assign(UUIDBytes.begin(), UUIDBytes.end()); + } + + /// Thread safe iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback); + + /// Thread safe const iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback) const; + +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,228 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H + + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorOr.h" + +#include +#include +#include +#include +#include + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +/// GsymReader is used to read GSYM data from a file or buffer. +/// +/// This class is optimized for very quick lookups when the endianness matches +/// the host system. The Header, address table, address info offsets, and file +/// table is designed to be mmap'ed as read only into memory and used without +/// any parsing needed. If the endianness doesn't match, we swap these objects +/// and tables into GsymReader::SwappedData and then point our header and +/// ArrayRefs to this swapped internal data. +/// +/// GsymReader objects must use one of the static functions to create an +/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). + +class GsymReader { + GsymReader(std::unique_ptr Buffer); + llvm::Error parse(); + + std::unique_ptr MemBuffer; + StringRef GsymBytes; + llvm::support::endianness Endian; + const Header *Hdr = nullptr; + ArrayRef AddrOffsets; + ArrayRef AddrInfoOffsets; + ArrayRef Files; + StringTable StrTab; + /// When the GSYM file's endianness doesn't match the host system then + /// we must decode all data structures that need to be swapped into + /// local storage and set point the ArrayRef objects above to these swapped + /// copies. + struct SwappedData { + Header Hdr; + std::vector AddrOffsets; + std::vector AddrInfoOffsets; + std::vector Files; + }; + std::unique_ptr Swap; + +public: + GsymReader(GsymReader &&RHS); + ~GsymReader(); + + /// Construct a GsymReader from a file on disk. + /// + /// \param Path The file path the GSYM file to read. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected openFile(StringRef Path); + + /// Construct a GsymReader from a buffer. + /// + /// \param Bytes A set of bytes that will be copied and owned by the + /// returned object on success. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected copyBuffer(StringRef Bytes); + + /// Access the GSYM header. + /// \returns A native endian version of the GSYM header. + const Header &getHeader() const; + + /// Get the full function info for an address. + /// + /// \param Addr A virtual address from the orignal object file to lookup. + /// \returns An expected FunctionInfo that contains the function info object + /// or an error object that indicates reason for failing to lookup the + /// address, + llvm::Expected getFunctionInfo(uint64_t Addr) const; + + /// Get a string from the string table. + /// + /// \param Offset The string table offset for the string to retrieve. + /// \returns The string from the strin table. + StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } + +protected: + /// Gets an address from the address table. + /// + /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. + /// + /// \param Index A index into the address table. + /// \returns A resolved virtual address for adddress in the address table + /// or llvm::None if Index is out of bounds. + Optional getAddress(size_t Index) const; + + /// Get the a file entry for the suppplied file index. + /// + /// Used to convert any file indexes in the FunctionInfo data back into + /// files. This function can be used for iteration, but is more commonly used + /// for random access when doing lookups. + /// + /// \param Index An index into the file table. + /// \returns An optional FileInfo that will be valid if the file index is + /// valid, or llvm::None if the file index is out of bounds, + Optional getFile(uint32_t Index) const { + if (Index < Files.size()) + return Files[Index]; + return llvm::None; + } + + /// Get an appropriate address info offsets array. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte offsets from the The gsym::Header::BaseAddress. The table is stored + /// internally as a array of bytes that are in the correct endianness. When + /// we access this table we must get an array that matches those sizes. This + /// templatized helper function is used when accessing address offsets in the + /// AddrOffsets member variable. + /// + /// \returns An ArrayRef of an appropriate address offset size. + template ArrayRef + getAddrOffsets() const { + return ArrayRef(reinterpret_cast(AddrOffsets.data()), + AddrOffsets.size()/sizeof(T)); + } + + /// Get an appropriate address from the address table. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte address offsets from the The gsym::Header::BaseAddress. The table is + /// stored internally as a array of bytes that are in the correct endianness. + /// In order to extract an address from the address table we must access the + /// address offset using the correct size and then add it to the BaseAddress + /// in the header. + /// + /// \param Index An index into the AddrOffsets array. + /// \returns An virtual address that matches the original object file for the + /// address as the specified index, or llvm::None if Index is out of bounds. + template Optional + addressForIndex(size_t Index) const { + ArrayRef AIO = getAddrOffsets(); + if (Index < AIO.size()) + return AIO[Index] + Hdr->BaseAddress; + return llvm::None; + } + /// Lookup an address offset in the AddrOffsets table. + /// + /// Given an address offset, look it up using a binary search of the + /// AddrOffsets table. + /// + /// \param AddrOffset An address offset, that has already been computed by + /// subtracting the gsym::Header::BaseAddress. + /// \returns The matching address offset index. This index will be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + template + uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const { + ArrayRef AIO = getAddrOffsets(); + const auto Begin = AIO.begin(); + const auto End = AIO.end(); + auto Iter = std::lower_bound(Begin, End, AddrOffset); + if (Iter == End || AddrOffset < *Iter) + --Iter; + return std::distance(Begin, Iter); + } + + /// Create a GSYM from a memory buffer. + /// + /// Called by both openFile() and copyBuffer(), this function does all of the + /// work of parsing the GSYM file and returning an error. + /// + /// \param MemBuffer A memory buffer that will transfer ownership into the + /// GsymReader. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected + create(std::unique_ptr &MemBuffer); + + + /// Given an address, find the address index. + /// + /// Binary search the address table and find the matching address index. + /// + /// \param Addr A virtual address that matches the original object file + /// to lookup. + /// \returns An index into the address table. This index can be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + /// Returns an error if the address isn't in the GSYM with details of why. + Expected getAddressIndex(const uint64_t Addr) const; + + /// Given an address index, get the offset for the FunctionInfo. + /// + /// Looking up an address is done by finding the corresponding address + /// index for the address. This index is then used to get the offset of the + /// FunctionInfo data that we will decode using this function. + /// + /// \param Index An index into the address table. + /// \returns An optional GSYM data offset for the offset of the FunctionInfo + /// that needs to be decoded. + Optional getAddressInfoOffset(size_t Index) const; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/Header.h b/llvm/include/llvm/DebugInfo/GSYM/Header.h --- a/llvm/include/llvm/DebugInfo/GSYM/Header.h +++ b/llvm/include/llvm/DebugInfo/GSYM/Header.h @@ -22,6 +22,7 @@ class FileWriter; constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG' constexpr uint32_t GSYM_VERSION = 1; constexpr size_t GSYM_MAX_UUID_SIZE = 20; @@ -84,16 +85,20 @@ /// be set to zero. uint8_t UUID[GSYM_MAX_UUID_SIZE]; - /// Check if a header is valid. + /// Check if a header is valid and return an error if anything is wrong. /// - /// \returns True if the header is valid and if the version is supported. - bool isValid() const { - if (Magic != GSYM_MAGIC) - return false; - if (Version != GSYM_VERSION) - return false; - return true; - } + /// This function can be used prior to encoding a header to ensure it is + /// valid, or after decoding a header to ensure it is valid and supported. + /// + /// Check a correctly byte swapped header for errors: + /// - check magic value + /// - check that version number is supported + /// - check that the address offset size is supported + /// - check that the UUID size is valid + /// + /// \returns An error if anything is wrong in the header, or Error::success() + /// if there are no errors. + llvm::Error checkForError() const; /// Decode an object from a binary data stream. /// diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt --- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt @@ -2,6 +2,8 @@ Header.cpp FileWriter.cpp FunctionInfo.cpp + GsymCreator.cpp + GsymReader.cpp InlineInfo.cpp LineTable.cpp Range.cpp @@ -9,4 +11,7 @@ ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/GSYM ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo + + DEPENDS + LLVMMC ) diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -1,4 +1,4 @@ -//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +//===- FunctionInfo.cpp ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,274 @@ +//===- GsymCreator.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + + +using namespace llvm; +using namespace gsym; + + +GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { + insertFile(StringRef()); +} + +uint32_t GsymCreator::insertFile(StringRef Path, + llvm::sys::path::Style Style) { + llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); + llvm::StringRef filename = llvm::sys::path::filename(Path, Style); + FileEntry FE(insertString(directory), insertString(filename)); + + std::lock_guard Guard(Mutex); + const auto NextIndex = Files.size(); + // Find FE in hash map and insert if not present. + auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); + if (R.second) + Files.emplace_back(FE); + return R.first->second; +} + +llvm::Error GsymCreator::save(StringRef Path, + llvm::support::endianness ByteOrder) const { + std::error_code EC; + raw_fd_ostream OutStrm(Path, EC); + if (EC) + return llvm::errorCodeToError(EC); + FileWriter O(OutStrm, ByteOrder); + return encode(O); +} + +llvm::Error GsymCreator::encode(FileWriter &O) const { + std::lock_guard Guard(Mutex); + if (Funcs.empty()) + return createStringError(std::errc::invalid_argument, + "no functions to encode"); + if (!Finalized) + return createStringError(std::errc::invalid_argument, + "GsymCreator wasn't finalized prior to encoding"); + + if (Funcs.size() > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many FunctionInfos"); + const uint64_t MinAddr = Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + Header Hdr; + Hdr.Magic = GSYM_MAGIC; + Hdr.Version = GSYM_VERSION; + Hdr.AddrOffSize = 0; + Hdr.UUIDSize = static_cast(UUID.size()); + Hdr.BaseAddress = MinAddr; + Hdr.NumAddresses = static_cast(Funcs.size()); + Hdr.StrtabOffset = 0; // We will fix this up later. + Hdr.StrtabOffset = 0; // We will fix this up later. + bzero(Hdr.UUID, sizeof(Hdr.UUID)); + if (UUID.size() > sizeof(Hdr.UUID)) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", (uint32_t)UUID.size()); + // Set the address offset size correctly in the GSYM header. + if (AddrDelta <= UINT8_MAX) + Hdr.AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + Hdr.AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + Hdr.AddrOffSize = 4; + else + Hdr.AddrOffSize = 8; + // Copy the UUID value if we have one. + if (UUID.size() > 0) + memcpy(Hdr.UUID, UUID.data(), UUID.size()); + // Write out the header. + llvm::Error Err = Hdr.encode(O); + if (Err) + return Err; + + // Write out the address offsets. + O.alignTo(Hdr.AddrOffSize); + for (const auto &FuncInfo : Funcs) { + uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + switch(Hdr.AddrOffSize) { + case 1: O.writeU8(static_cast(AddrOffset)); break; + case 2: O.writeU16(static_cast(AddrOffset)); break; + case 4: O.writeU32(static_cast(AddrOffset)); break; + case 8: O.writeU64(AddrOffset); break; + } + } + + // Write out all zeros for the AddrInfoOffsets. + O.alignTo(4); + const off_t AddrInfoOffsetsOffset = O.tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + O.writeU32(0); + + // Write out the file table + O.alignTo(4); + assert(!Files.empty()); + assert(Files[0].Dir == 0); + assert(Files[0].Base == 0); + size_t NumFiles = Files.size(); + if (NumFiles > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many files"); + O.writeU32(static_cast(NumFiles)); + for (auto File: Files) { + O.writeU32(File.Dir); + O.writeU32(File.Base); + } + + // Write out the sting table. + const off_t StrtabOffset = O.tell(); + StrTab.write(O.get_stream()); + const off_t StrtabSize = O.tell() - StrtabOffset; + std::vector AddrInfoOffsets; + + // Write out the address infos for each function info. + for (const auto &FuncInfo : Funcs) { + if (Expected OffsetOrErr = FuncInfo.encode(O)) + AddrInfoOffsets.push_back(OffsetOrErr.get()); + else + return OffsetOrErr.takeError(); + } + // Fixup the string table offset and size in the header + O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); + O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); + + // Fixup all address info offsets + uint64_t Offset = 0; + for (auto AddrInfoOffset: AddrInfoOffsets) { + O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); + Offset += 4; + } + return ErrorSuccess(); +} + +llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { + std::lock_guard Guard(Mutex); + if (Finalized) + return createStringError(std::errc::invalid_argument, + "already finalized"); + Finalized = true; + + // Sort function infos so we can emit sorted functions. + llvm::sort(Funcs.begin(), Funcs.end()); + + // Don't let the string table indexes change by finalizing in order. + StrTab.finalizeInOrder(); + + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + if (Prev->Range.intersects(Curr->Range)) { + // Overlapping address ranges. + if (Prev->Range == Curr->Range) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info. + // Our sorting guarantees that entries with matching address ranges + // that have debug info are last in the sort. + if (*Prev == *Curr) { + // FunctionInfo entries match exactly (range, lines, inlines) + OS << "warning: duplicate function info entries, removing " + "duplicate:\n" + << *Curr << '\n'; + Curr = Funcs.erase(Prev); + } else { + if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { + // Same address range, one with no debug info (symbol) and the + // next with debug info. Keep the latter. + Curr = Funcs.erase(Prev); + } else { + OS << "warning: same address range contains different debug " + << "info. Removing:\n" + << *Prev << "\nIn favor of this one:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + } else { + // print warnings about overlaps + OS << "warning: function ranges overlap:\n" + << *Prev << "\n" + << *Curr << "\n"; + } + } else if (Prev->Range.size() == 0 && + Curr->Range.contains(Prev->Range.Start)) { + OS << "warning: removing symbol:\n" + << *Prev << "\nKeeping:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + if (Curr == Funcs.end()) + break; + Prev = Curr++; + } + + OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " + << Funcs.size() << " total\n"; + return Error::success(); +} + +uint32_t GsymCreator::insertString(StringRef S) { + std::lock_guard Guard(Mutex); + if (S.empty()) + return 0; + return StrTab.add(S); +} + +void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard Guard(Mutex); + Funcs.emplace_back(FI); +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) { + std::lock_guard Guard(Mutex); + for (auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) const { + std::lock_guard Guard(Mutex); + for (const auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,274 @@ +//===- GsymReader.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace gsym; + +GsymReader::GsymReader(std::unique_ptr Buffer) : + MemBuffer(std::move(Buffer)), + Endian(support::endian::system_endianness()) {} + + GsymReader::GsymReader(GsymReader &&RHS) = default; + +GsymReader::~GsymReader() = default; + +llvm::Expected GsymReader::openFile(StringRef Filename) { + // Open the input file and return an appropriate error if needed. + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto Err = BuffOrErr.getError(); + if (Err) + return llvm::errorCodeToError(Err); + return create(BuffOrErr.get()); +} + +llvm::Expected GsymReader::copyBuffer(StringRef Bytes) { + auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); + return create(MemBuffer); +} + +llvm::Expected +GsymReader::create(std::unique_ptr &MemBuffer) { + if (!MemBuffer.get()) + return createStringError(std::errc::invalid_argument, + "invalid memory buffer"); + GsymReader GR(std::move(MemBuffer)); + llvm::Error Err = GR.parse(); + if (Err) + return std::move(Err); + return std::move(GR); +} + +llvm::Error +GsymReader::parse() { + BinaryStreamReader FileData(MemBuffer->getBuffer(), + support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(Hdr)) + return createStringError(std::errc::invalid_argument, + "not enough data for a GSYM header"); + + const auto HostByteOrder = support::endian::system_endianness(); + switch (Hdr->Magic) { + case GSYM_MAGIC: + Endian = HostByteOrder; + break; + case GSYM_CIGAM: + // This is a GSYM file, but not native endianness. + Endian = sys::IsBigEndianHost ? support::little : support::big; + Swap.reset(new SwappedData); + break; + default: + return createStringError(std::errc::invalid_argument, + "not a GSYM file"); + } + + bool DataIsLittleEndian = HostByteOrder != support::little; + // Read a correctly byte swapped header if we need to. + if (Swap) { + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + if (auto ExpectedHdr = Header::decode(Data)) + Swap->Hdr = ExpectedHdr.get(); + else + return ExpectedHdr.takeError(); + Hdr = &Swap->Hdr; + } + + // Detect errors in the header and report any that are found. If we make it + // past this without errors, we know we have a good magic value, a supported + // version number, verified address offset size and a valid UUID size. + if (Error Err = Hdr->checkForError()) + return Err; + + if (!Swap) { + // This is the native endianness case that is most common and optimized for + // efficient lookups. Here we just grab pointers to the native data and + // use ArrayRef objects to allow efficient read only access. + + // Read the address offsets. + if (FileData.padToAlignment(Hdr->AddrOffSize) || + FileData.readArray(AddrOffsets, + Hdr->NumAddresses * Hdr->AddrOffSize)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + + // Read the address info offsets. + if (FileData.padToAlignment(4) || + FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address info offsets table"); + + // Read the file table. + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + + // Get the string table. + FileData.setOffset(Hdr->StrtabOffset); + if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); +} else { + // This is the non native endianness case that is not common and not + // optimized for lookups. Here we decode the important tables into local + // storage and then set the ArrayRef objects to point to these swapped + // copies of the read only data so lookups can be as efficient as possible. + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + + // Read the address offsets. + uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); + Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); + switch (Hdr->AddrOffSize) { + case 1: + if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 2: + if (!Data.getU16(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 4: + if (!Data.getU32(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 8: + if (!Data.getU64(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + } + AddrOffsets = ArrayRef(Swap->AddrOffsets); + + // Read the address info offsets. + Offset = alignTo(Offset, 4); + Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); + if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) + AddrInfoOffsets = ArrayRef(Swap->AddrInfoOffsets); + else + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + // Read the file table. + const uint32_t NumFiles = Data.getU32(&Offset); + if (NumFiles > 0) { + Swap->Files.resize(NumFiles); + if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) + Files = ArrayRef(Swap->Files); + else + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + } + // Get the string table. + StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, + Hdr->StrtabSize); + if (StrTab.Data.empty()) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); + } + return Error::success(); + +} + +const Header &GsymReader::getHeader() const { + // The only way to get a GsymReader is from GsymReader::openFile(...) or + // GsymReader::copyBuffer() and the header must be valid and initialized to + // a valid pointer value, so the assert below should not trigger. + assert(Hdr); + return *Hdr; +} + +Optional GsymReader::getAddress(size_t Index) const { + switch (Hdr->AddrOffSize) { + case 1: return addressForIndex(Index); + case 2: return addressForIndex(Index); + case 4: return addressForIndex(Index); + case 8: return addressForIndex(Index); + } + return llvm::None; +} + +Optional GsymReader::getAddressInfoOffset(size_t Index) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (Index < NumAddrInfoOffsets) + return AddrInfoOffsets[Index]; + return llvm::None; +} + +Expected +GsymReader::getAddressIndex(const uint64_t Addr) const { + if (Addr < Hdr->BaseAddress) + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + switch (Hdr->AddrOffSize) { + case 1: return getAddressOffsetIndex(AddrOffset); + case 2: return getAddressOffsetIndex(AddrOffset); + case 4: return getAddressOffsetIndex(AddrOffset); + case 8: return getAddressOffsetIndex(AddrOffset); + default: break; + } + return createStringError(std::errc::invalid_argument, + "unsupported address offset size %u", + Hdr->AddrOffSize); +} + +llvm::Expected GsymReader::getFunctionInfo(uint64_t Addr) const { + Expected AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional OptAddr = getAddress(*AddressIndex)) { + auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr); + if (ExpectedFI) { + if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0) + return ExpectedFI; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + } + } + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} diff --git a/llvm/lib/DebugInfo/GSYM/Header.cpp b/llvm/lib/DebugInfo/GSYM/Header.cpp --- a/llvm/lib/DebugInfo/GSYM/Header.cpp +++ b/llvm/lib/DebugInfo/GSYM/Header.cpp @@ -38,14 +38,14 @@ } /// Check the header and detect any errors. -static llvm::Error getHeaderError(const Header &H) { - if (H.Magic != GSYM_MAGIC) +llvm::Error Header::checkForError() const { + if (Magic != GSYM_MAGIC) return createStringError(std::errc::invalid_argument, - "invalid GSYM magic 0x%8.8x", H.Magic); - if (H.Version != GSYM_VERSION) + "invalid GSYM magic 0x%8.8x", Magic); + if (Version != GSYM_VERSION) return createStringError(std::errc::invalid_argument, - "unsupported GSYM version %u", H.Version); - switch (H.AddrOffSize) { + "unsupported GSYM version %u", Version); + switch (AddrOffSize) { case 1: break; case 2: break; case 4: break; @@ -53,11 +53,11 @@ default: return createStringError(std::errc::invalid_argument, "invalid address offset size %u", - H.AddrOffSize); + AddrOffSize); } - if (H.UUIDSize > GSYM_MAX_UUID_SIZE) + if (UUIDSize > GSYM_MAX_UUID_SIZE) return createStringError(std::errc::invalid_argument, - "invalid UUID size %u", H.UUIDSize); + "invalid UUID size %u", UUIDSize); return Error::success(); } @@ -77,16 +77,14 @@ H.StrtabOffset = Data.getU32(&Offset); H.StrtabSize = Data.getU32(&Offset); Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE); - llvm::Error Err = getHeaderError(H); - if (Err) + if (llvm::Error Err = H.checkForError()) return std::move(Err); return H; } llvm::Error Header::encode(FileWriter &O) const { // Users must verify the Header is valid prior to calling this funtion. - llvm::Error Err = getHeaderError(*this); - if (Err) + if (llvm::Error Err = checkForError()) return Err; O.writeU32(Magic); O.writeU16(Version); diff --git a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt --- a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_LINK_COMPONENTS DebugInfoGSYM + MC Support ) diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -13,6 +13,8 @@ #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" @@ -1046,3 +1048,255 @@ TestHeaderEncodeDecode(H, llvm::support::little); TestHeaderEncodeDecode(H, llvm::support::big); } + +static void TestGsymCreatorEncodeError(llvm::support::endianness ByteOrder, + const GsymCreator &GC, + std::string ExpectedErrorMsg) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_TRUE(bool(Err)); + checkError(ExpectedErrorMsg, std::move(Err)); +} + +TEST(GSYMTest, TestGsymCreatorEncodeErrors) { + const uint8_t ValidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16}; + const uint8_t InvalidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21}; + // Verify we get an error when trying to encode an GsymCreator with no + // function infos. We shouldn't be saving a GSYM file in this case since + // there is nothing inside of it. + GsymCreator GC; + TestGsymCreatorEncodeError(llvm::support::little, GC, + "no functions to encode"); + const uint64_t FuncAddr = 0x1000; + const uint64_t FuncSize = 0x100; + const uint32_t FuncName = GC.insertString("foo"); + // Verify we get an error trying to encode a GsymCreator that isn't + // finalized. + GC.addFunctionInfo(FunctionInfo(FuncAddr, FuncSize, FuncName)); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "GsymCreator wasn't finalized prior to encoding"); + std::string finalizeIssues; + raw_string_ostream OS(finalizeIssues); + llvm::Error finalizeErr = GC.finalize(OS); + ASSERT_FALSE(bool(finalizeErr)); + finalizeErr = GC.finalize(OS); + ASSERT_TRUE(bool(finalizeErr)); + checkError("already finalized", std::move(finalizeErr)); + // Verify we get an error trying to encode a GsymCreator with a UUID that is + // too long. + GC.setUUID(InvalidUUID); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "invalid UUID size 21"); + GC.setUUID(ValidUUID); + // Verify errors are propagated when we try to encoding an invalid line + // table. + GC.forEachFunctionInfo([](FunctionInfo &FI) -> bool { + FI.OptLineTable = LineTable(); // Invalid line table. + return false; // Stop iterating + }); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "attempted to encode invalid LineTable object"); + // Verify errors are propagated when we try to encoding an invalid inline + // info. + GC.forEachFunctionInfo([](FunctionInfo &FI) -> bool { + FI.OptLineTable = llvm::None; + FI.Inline = InlineInfo(); // Invalid InlineInfo. + return false; // Stop iterating + }); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "attempted to encode invalid InlineInfo object"); +} + +static void Compare(const GsymCreator &GC, const GsymReader &GR) { + // Verify that all of the data in a GsymCreator is correctly decoded from + // a GsymReader. To do this, we iterator over + GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool { + auto DecodedFI = GR.getFunctionInfo(FI.Range.Start); + EXPECT_TRUE(bool(DecodedFI)); + EXPECT_EQ(FI, *DecodedFI); + return true; // Keep iterating over all FunctionInfo objects. + }); +} + +static void TestEncodeDecode(const GsymCreator &GC, + support::endianness ByteOrder, uint16_t Version, + uint8_t AddrOffSize, uint64_t BaseAddress, + uint32_t NumAddresses, ArrayRef UUID) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_FALSE((bool)Err); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_TRUE(bool(GR)); + const Header &Hdr = GR->getHeader(); + EXPECT_EQ(Hdr.Version, Version); + EXPECT_EQ(Hdr.AddrOffSize, AddrOffSize); + EXPECT_EQ(Hdr.UUIDSize, UUID.size()); + EXPECT_EQ(Hdr.BaseAddress, BaseAddress); + EXPECT_EQ(Hdr.NumAddresses, NumAddresses); + EXPECT_EQ(ArrayRef(Hdr.UUID, Hdr.UUIDSize), UUID); + Compare(GC, GR.get()); +} + +TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 1; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x00, 0x10, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x20, 0x10, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 2; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x200, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 4; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x20000, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 8; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x100000000, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +static void VerifyFunctionInfo(const GsymReader &GR, uint64_t Addr, + const FunctionInfo &FI) { + auto ExpFI = GR.getFunctionInfo(Addr); + ASSERT_TRUE(bool(ExpFI)); + ASSERT_EQ(FI, ExpFI.get()); +} + +static void VerifyFunctionInfoError(const GsymReader &GR, uint64_t Addr, + std::string ErrMessage) { + auto ExpFI = GR.getFunctionInfo(Addr); + ASSERT_FALSE(bool(ExpFI)); + checkError(ErrMessage, ExpFI.takeError()); +} + +TEST(GSYMTest, TestGsymReader) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint64_t Func1Addr = BaseAddr; + constexpr uint64_t Func2Addr = BaseAddr+0x20; + constexpr uint64_t FuncSize = 0x10; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + const auto ByteOrder = support::endian::system_endianness(); + GC.addFunctionInfo(FunctionInfo(Func1Addr, FuncSize, Func1Name)); + GC.addFunctionInfo(FunctionInfo(Func2Addr, FuncSize, Func2Name)); + Error FinalizeErr = GC.finalize(llvm::nulls()); + ASSERT_FALSE(FinalizeErr); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_FALSE((bool)Err); + if (auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str())) { + const GsymReader &GR = ExpectedGR.get(); + VerifyFunctionInfoError(GR, Func1Addr-1, "address 0xfff not in GSYM"); + + FunctionInfo Func1(Func1Addr, FuncSize, Func1Name); + VerifyFunctionInfo(GR, Func1Addr, Func1); + VerifyFunctionInfo(GR, Func1Addr+1, Func1); + VerifyFunctionInfo(GR, Func1Addr+FuncSize-1, Func1); + VerifyFunctionInfoError(GR, Func1Addr+FuncSize, + "address 0x1010 not in GSYM"); + VerifyFunctionInfoError(GR, Func2Addr-1, "address 0x101f not in GSYM"); + FunctionInfo Func2(Func2Addr, FuncSize, Func2Name); + VerifyFunctionInfo(GR, Func2Addr, Func2); + VerifyFunctionInfo(GR, Func2Addr+1, Func2); + VerifyFunctionInfo(GR, Func2Addr+FuncSize-1, Func2); + VerifyFunctionInfoError(GR, Func2Addr+FuncSize, + "address 0x1030 not in GSYM"); + } +}