Index: include/llvm/DebugInfo/GSYM/Header.h =================================================================== --- include/llvm/DebugInfo/GSYM/Header.h +++ include/llvm/DebugInfo/GSYM/Header.h @@ -0,0 +1,124 @@ +//===- Header.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_HEADER_H +#define LLVM_DEBUGINFO_GSYM_HEADER_H + +#include "llvm/Support/Error.h" + +#include +#include + +namespace llvm { +class raw_ostream; +class DataExtractor; + +namespace gsym { +class FileWriter; + +constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_VERSION = 1; +constexpr size_t GSYM_MAX_UUID_SIZE = 20; + +/// The GSYM header. +/// +/// The GSYM header is found at the start of a stand alone GSYM file, or as +/// the first bytes in a section when GSYM is contained in a section of an +/// executable file (ELF, mach-o, COFF). +/// +/// The structure is encoded exactly as it appears in the structure definition +/// with no gaps between members. Alignment should not change from system to +/// system as the members were laid out so that they shouldn't align +/// differently on different architectures. +/// +/// When endianness of the system loading a GSYM file matches, the file can +/// be mmap'ed in and a pointer to the header can be cast to the first bytes +/// of the file (stand alone GSYM file) or section data (GSYM in a section). +/// When endianness is swapped, the Header::decode() function should be used to +/// decode the header. +struct Header { + /// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file + /// is a GSYM file by scanning the first 4 bytes of a file or section. + /// This value might appear byte swapped + uint32_t Magic; + /// The version can number determines how the header is decoded and how each + /// InfoType in FunctionInfo is encoded/decoded. As version numbers increase, + /// "Magic" and "Version" members should always appear at offset zero and 4 + /// respectively to ensure clients figure out if they can parse the format. + uint16_t Version; + /// The size in bytes of each address offset in the address offsets table. + uint8_t AddrOffSize; + /// The size in bytes of the UUID encoded in the "UUID" member. + uint8_t UUIDSize; + /// The 64 bit base address that all address offsets in the address offsets + /// table are relative to. Storing a full 64 bit address allows our address + /// offsets table to be smaller on disk. + uint64_t BaseAddress; + /// The number of addresses stored in the address offsets table. + uint32_t NumAddresses; + /// The file relative offset of the start of the string table for strings + /// contained in the GSYM file. If the GSYM in contained in a stand alone + /// file this will be the file offset of the start of the string table. If + /// the GSYM is contained in a section within an executable file, this can + /// be the offset of the first string used in the GSYM file and can possibly + /// span one or more executable string tables. This allows the strings to + /// share string tables in an ELF or mach-o file. + uint32_t StrtabOffset; + /// The size in bytes of the string table. For a stand alone GSYM file, this + /// will be the exact size in bytes of the string table. When the GSYM data + /// is in a section within an executable file, this size can span one or more + /// sections that contains strings. This allows any strings that are already + /// stored in the executable file to be re-used, and any extra strings could + /// be added to another string table and the string table offset and size + /// can be set to span all needed string tables. + uint32_t StrtabSize; + /// The UUID of the original executable file. This is stored to allow + /// matching a GSYM file to an executable file when symbolication is + /// required. Only the first "UUIDSize" bytes of the UUID are valid. Any + /// bytes in the UUID value that appear after the first UUIDSize bytes should + /// be set to zero. + uint8_t UUID[GSYM_MAX_UUID_SIZE]; + + /// Check if a header is valid. + /// + /// \returns True if the header is valid and if the version is supported. + bool isValid() const { + if (Magic != GSYM_MAGIC) + return false; + if (Version != GSYM_VERSION) + return false; + return true; + } + + /// Decode an object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \returns A Header or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected
decode(DataExtractor &Data); + + /// Encode this object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \returns An error object that indicates success or failure of the + /// encoding process. + llvm::Error encode(FileWriter &O) const; +}; + +bool operator==(const Header &LHS, const Header &RHS); +raw_ostream &operator<<(raw_ostream &OS, const llvm::gsym::Header &H); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_HEADER_H Index: lib/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- lib/DebugInfo/GSYM/CMakeLists.txt +++ lib/DebugInfo/GSYM/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMDebugInfoGSYM + Header.cpp FileWriter.cpp FunctionInfo.cpp InlineInfo.cpp Index: lib/DebugInfo/GSYM/Header.cpp =================================================================== --- lib/DebugInfo/GSYM/Header.cpp +++ lib/DebugInfo/GSYM/Header.cpp @@ -0,0 +1,111 @@ +//===- Header.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +#define HEX8(v) llvm::format_hex(v, 4) +#define HEX16(v) llvm::format_hex(v, 6) +#define HEX32(v) llvm::format_hex(v, 10) +#define HEX64(v) llvm::format_hex(v, 18) + +using namespace llvm; +using namespace gsym; + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const Header &H) { + OS << "Header:\n"; + OS << " Magic = " << HEX32(H.Magic) << "\n"; + OS << " Version = " << HEX16(H.Version) << '\n'; + OS << " AddrOffSize = " << HEX8(H.AddrOffSize) << '\n'; + OS << " UUIDSize = " << HEX8(H.UUIDSize) << '\n'; + OS << " BaseAddress = " << HEX64(H.BaseAddress) << '\n'; + OS << " NumAddresses = " << HEX32(H.NumAddresses) << '\n'; + OS << " StrtabOffset = " << HEX32(H.StrtabOffset) << '\n'; + OS << " StrtabSize = " << HEX32(H.StrtabSize) << '\n'; + OS << " UUID = "; + for (uint8_t I = 0; I < H.UUIDSize; ++I) + OS << format_hex_no_prefix(H.UUID[I], 2); + OS << '\n'; + return OS; +} + +/// Check the header and detect any errors. +static llvm::Error getHeaderError(const Header &H) { + if (H.Magic != GSYM_MAGIC) + return createStringError(std::errc::invalid_argument, + "invalid GSYM magic 0x%8.8x", H.Magic); + if (H.Version != GSYM_VERSION) + return createStringError(std::errc::invalid_argument, + "unsupported GSYM version %u", H.Version); + switch (H.AddrOffSize) { + case 1: break; + case 2: break; + case 4: break; + case 8: break; + default: + return createStringError(std::errc::invalid_argument, + "invalid address offset size %u", + H.AddrOffSize); + } + if (H.UUIDSize > GSYM_MAX_UUID_SIZE) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", H.UUIDSize); + return Error::success(); +} + +llvm::Expected
Header::decode(DataExtractor &Data) { + uint64_t Offset = 0; + // The header is stored as a single blob of data that has a fixed byte size. + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Header))) + return createStringError(std::errc::invalid_argument, + "not enough data for a gsym::Header"); + Header H; + H.Magic = Data.getU32(&Offset); + H.Version = Data.getU16(&Offset); + H.AddrOffSize = Data.getU8(&Offset); + H.UUIDSize = Data.getU8(&Offset); + H.BaseAddress = Data.getU64(&Offset); + H.NumAddresses = Data.getU32(&Offset); + H.StrtabOffset = Data.getU32(&Offset); + H.StrtabSize = Data.getU32(&Offset); + Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE); + llvm::Error Err = getHeaderError(H); + if (Err) + return std::move(Err); + return H; +} + +llvm::Error Header::encode(FileWriter &O) const { + // Users must verify the Header is valid prior to calling this funtion. + llvm::Error Err = getHeaderError(*this); + if (Err) + return Err; + O.writeU32(Magic); + O.writeU16(Version); + O.writeU8(AddrOffSize); + O.writeU8(UUIDSize); + O.writeU64(BaseAddress); + O.writeU32(NumAddresses); + O.writeU32(StrtabOffset); + O.writeU32(StrtabSize); + O.writeData(llvm::ArrayRef(UUID)); + return Error::success(); +} + +bool llvm::gsym::operator==(const Header &LHS, const Header &RHS) { + return LHS.Magic == RHS.Magic && LHS.Version == RHS.Version && + LHS.AddrOffSize == RHS.AddrOffSize && LHS.UUIDSize == RHS.UUIDSize && + LHS.BaseAddress == RHS.BaseAddress && + LHS.NumAddresses == RHS.NumAddresses && + LHS.StrtabOffset == RHS.StrtabOffset && + LHS.StrtabSize == RHS.StrtabSize && + memcmp(LHS.UUID, RHS.UUID, LHS.UUIDSize) == 0; +} Index: unittests/DebugInfo/GSYM/GSYMTest.cpp =================================================================== --- unittests/DebugInfo/GSYM/GSYMTest.cpp +++ unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/GSYM/Header.h" #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" @@ -944,3 +945,104 @@ LT.encode(FW, BaseAddr)); LT.clear(); } + +static void TestHeaderEncodeError(const Header &H, + std::string ExpectedErrorMsg) { + const support::endianness ByteOrder = llvm::support::little; + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = H.encode(FW); + checkError(ExpectedErrorMsg, std::move(Err)); +} + +static void TestHeaderDecodeError(std::string Bytes, + std::string ExpectedErrorMsg) { + const support::endianness ByteOrder = llvm::support::little; + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + llvm::Expected
Decoded = Header::decode(Data); + // Make sure decoding fails. + ASSERT_FALSE((bool)Decoded); + // Make sure decoded object is the same as the one we encoded. + checkError(ExpectedErrorMsg, Decoded.takeError()); +} + +// Populate a GSYM header with valid values. +static void InitHeader(Header &H) { + H.Magic = GSYM_MAGIC; + H.Version = GSYM_VERSION; + H.AddrOffSize = 4; + H.UUIDSize = 16; + H.BaseAddress = 0x1000; + H.NumAddresses = 1; + H.StrtabOffset= 0x2000; + H.StrtabSize = 0x1000; + for (size_t i=0; i Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + Header H; + InitHeader(H); + llvm::Error Err = H.encode(FW); + ASSERT_FALSE(Err); + FW.fixup32(12, offsetof(Header, Magic)); + TestHeaderDecodeError(OutStrm.str(), "invalid GSYM magic 0x0000000c"); + FW.fixup32(GSYM_MAGIC, offsetof(Header, Magic)); + FW.fixup32(12, offsetof(Header, Version)); + TestHeaderDecodeError(OutStrm.str(), "unsupported GSYM version 12"); + FW.fixup32(GSYM_VERSION, offsetof(Header, Version)); + FW.fixup32(12, offsetof(Header, AddrOffSize)); + TestHeaderDecodeError(OutStrm.str(), "invalid address offset size 12"); + FW.fixup32(4, offsetof(Header, AddrOffSize)); + FW.fixup32(128, offsetof(Header, UUIDSize)); + TestHeaderDecodeError(OutStrm.str(), "invalid UUID size 128"); +} + +static void TestHeaderEncodeDecode(const Header &H, + support::endianness ByteOrder) { + uint8_t AddressSize = 4; + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = H.encode(FW); + ASSERT_FALSE(Err); + std::string Bytes(OutStrm.str()); + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + llvm::Expected
Decoded = Header::decode(Data); + // Make sure decoding succeeded. + ASSERT_TRUE((bool)Decoded); + EXPECT_EQ(H, Decoded.get()); + +} +TEST(GSYMTest, TestHeaderEncodeDecode) { + Header H; + InitHeader(H); + TestHeaderEncodeDecode(H, llvm::support::little); + TestHeaderEncodeDecode(H, llvm::support::big); +}