Index: include/llvm/DebugInfo/GSYM/FunctionInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -21,15 +21,68 @@ class raw_ostream; namespace gsym { -/// Function information in GSYM files encodes information for one -/// contiguous address range. The name of the function is encoded as -/// a string table offset and allows multiple functions with the same -/// name to share the name string in the string table. Line tables are -/// stored in a sorted vector of gsym::LineEntry objects and are split -/// into line tables for each function. If a function has a discontiguous -/// range, it will be split into two gsym::FunctionInfo objects. If the -/// function has inline functions, the information will be encoded in -/// the "Inline" member, see gsym::InlineInfo for more information. +/// Function information in GSYM files encodes information for one contiguous +/// address range. If a function has discontiguous address ranges, they will +/// need to be encoded using multiple FunctionInfo objects. +/// +/// ENCODING +/// +/// The function information gets the function start address as an argument +/// to the FunctionInfo::decode(...) function. This information is calculated +/// from the GSYM header and an address offset from the GSYM address offsets +/// table. The encoded FunctionInfo information must be alinged to a 4 byte +/// boundary. +/// +/// The encoded data for a FunctionInfo starts with fixed data that all +/// function info objects have: +/// +/// ENCODING NAME DESCRIPTION +/// ========= =========== ==================================================== +/// uint32_t Size The size in bytes of this function. +/// uint32_t Name The string table offset of the function name. +/// +/// The optional data in a FunctionInfo object follows this fixed information +/// and consists of a stream of tuples that consist of: +/// +/// ENCODING NAME DESCRIPTION +/// ========= =========== ==================================================== +/// uint32_t InfoType An "InfoType" enumeration that describes the type +/// of optional data that is encoded. +/// uint32_t InfoLength The size in bytes of the encoded data that +/// immediately follows this length if this value is +/// greater than zero. +/// uint8_t[] InfoData Encoded bytes that represent the data for the +/// "InfoType". These bytes are only present if +/// "InfoLength" is greater than zero. +/// +/// The "InfoType" is an enumeration: +/// +/// enum InfoType { +/// EndOfList = 0u, +/// LineTableInfo = 1u, +/// InlineInfo = 2u +/// }; +/// +/// This stream of tuples is terminated by a "InfoType" whose value is +/// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of +/// the optional information list. This format allows us to add new optional +/// information data to a FunctionInfo object over time and allows older +/// clients to still parse the format and skip over any data that they don't +/// understand or want to parse. +/// +/// So the function information encoding essientially looks like: +/// +/// struct { +/// uint32_t Size; +/// uint32_t Name; +/// struct { +/// uint32_t InfoType; +/// uint32_t InfoLength; +/// uint8_t InfoData[InfoLength]; +/// }[N]; +/// } +/// +/// Where "N" is the number of tuples. struct FunctionInfo { AddressRange Range; uint32_t Name; ///< String table offset in the string table. @@ -39,23 +92,54 @@ FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0) : Range(Addr, Addr + Size), Name(N) {} + /// Query if a FunctionInfo has rich debug info. + /// + /// \returns A bool that indicates if this object has something else than + /// range and name. When converting information from a symbol table and from + /// debug info, we might end up with multiple FunctionInfo objects for the + /// same range and we need to be able to tell which one is the better object + /// to use. bool hasRichInfo() const { - /// Returns whether we have something else than range and name. When - /// converting information from a symbol table and from debug info, we - /// might end up with multiple FunctionInfo objects for the same range - /// and we need to be able to tell which one is the better object to use. return OptLineTable.hasValue() || Inline.hasValue(); } + /// Query if a FunctionInfo object is valid. + /// + /// Address and size can be zero and there can be no line entries for a + /// symbol so the only indication this entry is valid is if the name is + /// not zero. This can happen when extracting information from symbol + /// tables that do not encode symbol sizes. In that case only the + /// address and name will be filled in. + /// + /// \returns A boolean indicating if this FunctionInfo is valid. bool isValid() const { - /// Address and size can be zero and there can be no line entries for a - /// symbol so the only indication this entry is valid is if the name is - /// not zero. This can happen when extracting information from symbol - /// tables that do not encode symbol sizes. In that case only the - /// address and name will be filled in. return Name != 0; } + /// Decode an object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The FunctionInfo's start address and will be used as the + /// base address when decoding any contained information like the line table + /// and the inline info. + /// + /// \returns An FunctionInfo or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + + /// Encode this object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \returns An error object that indicates failure or the offset of the + /// function info that was successfully written into the stream. + llvm::Expected encode(FileWriter &O) const; + uint64_t startAddress() const { return Range.Start; } uint64_t endAddress() const { return Range.End; } uint64_t size() const { return Range.size(); } Index: lib/DebugInfo/GSYM/FunctionInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/FunctionInfo.cpp +++ lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -1,19 +1,147 @@ //===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" using namespace llvm; using namespace gsym; +///< FunctionInfo information type that is used to encode the optional data +///< that is associated with a FunctionInfo object. +enum InfoType { + EndOfList = 0u, + LineTableInfo = 1u, + InlineInfo = 2u +}; + raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): " << "Name=" << HEX32(FI.Name) << '\n' << FI.OptLineTable << FI.Inline; return OS; } + +llvm::Expected FunctionInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + FunctionInfo FI; + FI.Range.Start = BaseAddr; + uint64_t Offset = 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset); + FI.Range.End = FI.Range.Start + Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset); + FI.Name = Data.getU32(&Offset); + if (FI.Name == 0) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x", + Offset - 4, FI.Name); + bool Done = false; + while (!Done) { + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Offset); + const uint32_t IT = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Offset); + const uint32_t InfoLength = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, InfoLength)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u", + Offset, IT); + DataExtractor InfoData(Data.getData().substr(Offset, InfoLength), + Data.isLittleEndian(), + Data.getAddressSize()); + switch (IT) { + case InfoType::EndOfList: + Done = true; + break; + + case InfoType::LineTableInfo: + if (Expected LT = LineTable::decode(InfoData, BaseAddr)) + FI.OptLineTable = std::move(LT.get()); + else + return LT.takeError(); + break; + + case InfoType::InlineInfo: + if (Expected II = InlineInfo::decode(InfoData, BaseAddr)) + FI.Inline = std::move(II.get()); + else + return II.takeError(); + break; + + default: + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": unsupported InfoType %u", + Offset-8, IT); + } + Offset += InfoLength; + } + return std::move(FI); +} + +llvm::Expected FunctionInfo::encode(FileWriter &O) const { + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid FunctionInfo object"); + // Align FunctionInfo data to a 4 byte alignment. + O.alignTo(sizeof(uint32_t)); + const uint64_t FuncInfoOffset = O.tell(); + // Write the size in bytes of this function as a uint32_t. This can be zero + // if we just have a symbol from a symbol table and that symbol has no size. + O.writeU32(size()); + // Write the name of this function as a uint32_t string table offset. + O.writeU32(Name); + + if (OptLineTable.hasValue()) { + O.writeU32(InfoType::LineTableInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = OptLineTable->encode(O, Range.Start); + if (err) + return std::move(err); + const off_t Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "LineTable length is greater than UINT32_MAX"); + // Fixup the size of the LineTable data with the correct size. + O.fixup32(static_cast(Length), StartOffset - 4); + } + + // Write out the inline function info if we have any and if it is valid. + if (Inline.hasValue()) { + O.writeU32(InfoType::InlineInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = Inline->encode(O, Range.Start); + if (err) + return std::move(err); + const off_t Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "InlineInfo length is greater than UINT32_MAX"); + // Fixup the size of the InlineInfo data with the correct size. + O.fixup32(static_cast(Length), StartOffset - 4); + } + + // Terminate the data chunks with and end of list with zero size + O.writeU32(InfoType::EndOfList); + O.writeU32(0); + return FuncInfoOffset; +} Index: unittests/DebugInfo/GSYM/GSYMTest.cpp =================================================================== --- unittests/DebugInfo/GSYM/GSYMTest.cpp +++ unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -24,6 +24,23 @@ using namespace llvm; using namespace gsym; +void checkError(ArrayRef ExpectedMsgs, Error Err) { + ASSERT_TRUE(Err.operator bool()); + size_t WhichMsg = 0; + Error Remaining = + handleErrors(std::move(Err), [&](const ErrorInfoBase &Actual) { + ASSERT_LT(WhichMsg, ExpectedMsgs.size()); + // Use .str(), because googletest doesn't visualise a StringRef + // properly. + EXPECT_EQ(Actual.message(), ExpectedMsgs[WhichMsg++]); + }); + EXPECT_EQ(WhichMsg, ExpectedMsgs.size()); + EXPECT_FALSE(Remaining); +} + +void checkError(std::string ExpectedMsg, Error Err) { + checkError(ArrayRef{ExpectedMsg}, std::move(Err)); +} TEST(GSYMTest, TestFileEntry) { // Make sure default constructed GSYM FileEntry has zeroes in the // directory and basename string table indexes. @@ -137,24 +154,162 @@ EXPECT_LT(FIWithLines, FIWithLinesWithHigherAddress); } -void checkError(ArrayRef ExpectedMsgs, Error Err) { - ASSERT_TRUE(Err.operator bool()); - size_t WhichMsg = 0; - Error Remaining = - handleErrors(std::move(Err), [&](const ErrorInfoBase &Actual) { - ASSERT_LT(WhichMsg, ExpectedMsgs.size()); - // Use .str(), because googletest doesn't visualise a StringRef - // properly. - EXPECT_EQ(Actual.message(), ExpectedMsgs[WhichMsg++]); - }); - EXPECT_EQ(WhichMsg, ExpectedMsgs.size()); - EXPECT_FALSE(Remaining); +static void TestFunctionInfoDecodeError(llvm::support::endianness ByteOrder, + std::string Bytes, + const uint64_t BaseAddr, + std::string ExpectedErrorMsg) { + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + llvm::Expected Decoded = FunctionInfo::decode(Data, BaseAddr); + // Make sure decoding fails. + ASSERT_FALSE((bool)Decoded); + // Make sure decoded object is the same as the one we encoded. + checkError(ExpectedErrorMsg, Decoded.takeError()); } -void checkError(std::string ExpectedMsg, Error Err) { - checkError(ArrayRef{ExpectedMsg}, std::move(Err)); +TEST(GSYMTest, TestFunctionInfoDecodeErrors) { + // Test decoding FunctionInfo objects that ensure we report an appropriate + // error message. + const llvm::support::endianness ByteOrder = llvm::support::little; + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + const uint64_t BaseAddr = 0x100; + TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000000: missing FunctionInfo Size"); + FW.writeU32(0x100); // Function size. + TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000004: missing FunctionInfo Name"); + // Write out an invalid Name string table offset of zero. + FW.writeU32(0); + TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000004: invalid FunctionInfo Name value 0x00000000"); + // Modify the Name to be 0x00000001, which is a valid value. + FW.fixup32(0x00000001, 4); + TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000008: missing FunctionInfo InfoType value"); + auto FixupOffset = FW.tell(); + FW.writeU32(1); // InfoType::LineTableInfo. + TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x0000000c: missing FunctionInfo InfoType length"); + FW.fixup32(4, FixupOffset); // Write an invalid InfoType enumeration value + FW.writeU32(0); // LineTableInfo InfoType data length. + TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000008: unsupported InfoType 4"); } +static void TestFunctionInfoEncodeError(llvm::support::endianness ByteOrder, + const FunctionInfo &FI, + std::string ExpectedErrorMsg) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + Expected ExpectedOffset = FI.encode(FW); + ASSERT_FALSE(ExpectedOffset); + checkError(ExpectedErrorMsg, ExpectedOffset.takeError()); +} + +TEST(GSYMTest, TestFunctionInfoEncodeErrors) { + const uint64_t FuncAddr = 0x1000; + const uint64_t FuncSize = 0x100; + const uint32_t InvalidName = 0; + const uint32_t ValidName = 1; + FunctionInfo InvalidNameFI(FuncAddr, FuncSize, InvalidName); + TestFunctionInfoEncodeError(llvm::support::little, InvalidNameFI, + "attempted to encode invalid FunctionInfo object"); + + FunctionInfo InvalidLineTableFI(FuncAddr, FuncSize, ValidName); + // Empty line tables are not valid. Verify if the encoding of anything + // in our line table fails, that we see get the error propagated. + InvalidLineTableFI.OptLineTable = LineTable(); + TestFunctionInfoEncodeError(llvm::support::little, InvalidLineTableFI, + "attempted to encode invalid LineTable object"); + + FunctionInfo InvalidInlineInfoFI(FuncAddr, FuncSize, ValidName); + // Empty line tables are not valid. Verify if the encoding of anything + // in our line table fails, that we see get the error propagated. + InvalidInlineInfoFI.Inline = InlineInfo(); + TestFunctionInfoEncodeError(llvm::support::little, InvalidInlineInfoFI, + "attempted to encode invalid InlineInfo object"); +} + +static void TestFunctionInfoEncodeDecode(llvm::support::endianness ByteOrder, + const FunctionInfo &FI) { + // Test encoding and decoding FunctionInfo objects. + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Expected ExpectedOffset = FI.encode(FW); + ASSERT_TRUE(ExpectedOffset.operator bool()); + // Verify we got the encoded offset back from the encode function. + ASSERT_EQ(ExpectedOffset.get(), 0ULL); + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + llvm::Expected Decoded = FunctionInfo::decode(Data, + FI.Range.Start); + // Make sure decoding succeeded. + ASSERT_TRUE((bool)Decoded); + // Make sure decoded object is the same as the one we encoded. + EXPECT_EQ(FI, Decoded.get()); +} + + +TEST(GSYMTest, TestFunctionInfoEncoding) { + constexpr uint64_t FuncAddr = 0x1000; + constexpr uint64_t FuncSize = 0x100; + constexpr uint32_t FuncName = 1; + constexpr uint32_t FileIdx = 1; + // Make sure that we can encode and decode a FunctionInfo with no line table + // or inline info. + FunctionInfo FI(FuncAddr, FuncSize, FuncName); + TestFunctionInfoEncodeDecode(llvm::support::little, FI); + TestFunctionInfoEncodeDecode(llvm::support::big, FI); + + auto AddLinesLambda = [](FunctionInfo &FI) { + FI.OptLineTable = LineTable(); + LineEntry Line0(FuncAddr+0x000, FileIdx, 10); + LineEntry Line1(FuncAddr+0x010, FileIdx, 11); + LineEntry Line2(FuncAddr+0x100, FileIdx, 1000); + FI.OptLineTable->push(Line0); + FI.OptLineTable->push(Line1); + FI.OptLineTable->push(Line2); + }; + + auto AddInlineLambda = [](FunctionInfo &FI) { + FI.Inline = InlineInfo(); + FI.Inline->Ranges.insert(AddressRange(FuncAddr, FuncAddr+FuncSize)); + InlineInfo Inline1; + Inline1.Ranges.insert(AddressRange(FuncAddr+0x10, FuncAddr+0x30)); + Inline1.Name = 1; + Inline1.CallFile = 1; + Inline1.CallLine = 11; + FI.Inline->Children.push_back(Inline1); + }; + + // Make sure that we can encode and decode a FunctionInfo with a line table + // and no inline info. + FunctionInfo FILines(FuncAddr, FuncSize, FuncName); + AddLinesLambda(FILines); + TestFunctionInfoEncodeDecode(llvm::support::little, FILines); + TestFunctionInfoEncodeDecode(llvm::support::big, FILines); + + // Make sure that we can encode and decode a FunctionInfo with no line table + // and with inline info. + FunctionInfo FIInline(FuncAddr, FuncSize, FuncName); + AddInlineLambda(FIInline); + TestFunctionInfoEncodeDecode(llvm::support::little, FIInline); + TestFunctionInfoEncodeDecode(llvm::support::big, FIInline); + + // Make sure that we can encode and decode a FunctionInfo with no line table + // and with inline info. + FunctionInfo FIBoth(FuncAddr, FuncSize, FuncName); + AddLinesLambda(FIBoth); + AddInlineLambda(FIBoth); + TestFunctionInfoEncodeDecode(llvm::support::little, FIBoth); + TestFunctionInfoEncodeDecode(llvm::support::big, FIBoth); +} + static void TestInlineInfoEncodeDecode(llvm::support::endianness ByteOrder, const InlineInfo &Inline) { // Test encoding and decoding InlineInfo objects