Index: include/llvm/DebugInfo/GSYM/InlineInfo.h =================================================================== --- include/llvm/DebugInfo/GSYM/InlineInfo.h +++ include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -11,6 +11,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/Support/Error.h" #include #include @@ -30,6 +31,30 @@ /// Any clients that encode information will need to ensure the ranges are /// all contined correctly or lookups could fail. Add ranges in these objects /// must be contained in the top level FunctionInfo address ranges as well. +/// +/// ENCODING +/// +/// When saved to disk, the inline info encodes all ranges to be relative to +/// a parent address range. This will be the FunctionInfo's start address if +/// the InlineInfo is directly contained in a FunctionInfo, or a the start +/// address of the containing parent InlineInfo's first "Ranges" member. This +/// allows address ranges to be efficiently encoded using ULEB128 encodings as +/// we encode the offset and size of each range instead of full addresses. This +/// also makes any encoded addresses easy to relocate as we just need to +/// relocate the FunctionInfo's start address. +/// +/// - The AddressRanges member "Ranges" is encoded using an approriate base +/// address as described above. +/// - UINT8 boolean value that specifies if the InlineInfo object has children. +/// - UINT32 string table offset that points to the name of the inline +/// function. +/// - ULEB128 integer that specifies the file of the call site that called +/// this function. +/// - ULEB128 integer that specifies the source line of the call site that +/// called this function. +/// - if this object has children, enocode each child InlineInfo using the +/// the first address range's start address as the base address. +/// struct InlineInfo { uint32_t Name; ///< String table offset in the string table. @@ -61,6 +86,37 @@ /// \returns optional vector of InlineInfo objects that describe the /// inline call stack for a given address, false otherwise. llvm::Optional getInlineStack(uint64_t Addr) const; + + /// Decode an InlineInfo object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the InlineInfo object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The base address to use when decoding all address ranges. + /// This will be the FunctionInfo's start address if this object is directly + /// contained in a FunctionInfo object, or the start address of the first + /// address range in an InlineInfo object of this object is a child of + /// another InlineInfo object. + /// \returns An InlineInfo or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + + /// Encode this InlineInfo object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \param BaseAddr The base address to use when encoding all address ranges. + /// This will be the FunctionInfo's start address if this object is directly + /// contained in a FunctionInfo object, or the start address of the first + /// address range in an InlineInfo object of this object is a child of + /// another InlineInfo object. + /// + /// \returns An error object that indicates success or failure or the + /// encoding process. + llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const; }; inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) { Index: include/llvm/DebugInfo/GSYM/Range.h =================================================================== --- include/llvm/DebugInfo/GSYM/Range.h +++ include/llvm/DebugInfo/GSYM/Range.h @@ -80,6 +80,7 @@ void clear() { Ranges.clear(); } bool empty() const { return Ranges.empty(); } bool contains(uint64_t Addr) const; + bool contains(AddressRange Range) const; void insert(AddressRange Range); size_t size() const { return Ranges.size(); } bool operator==(const AddressRanges &RHS) const { Index: lib/DebugInfo/GSYM/InlineInfo.cpp =================================================================== --- lib/DebugInfo/GSYM/InlineInfo.cpp +++ lib/DebugInfo/GSYM/InlineInfo.cpp @@ -8,7 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -57,3 +59,102 @@ return Result; return llvm::None; } + +/// Decode an InlineInfo in Data at the specified offset. +/// +/// A local helper function to decode InlineInfo objects. This function is +/// called recursively when parsing child InlineInfo objects. +/// +/// \param Inline The InlineInfo object to decode into. +/// \param Data The data extractor to decode from. +/// \param Offset The offset within \a Data to decode from. +/// \param BaseAddr The base address to use when decoding address ranges. +/// \returns An InlineInfo or an error describing the issue that was +/// encountered during decoding. +static llvm::Expected decode(DataExtractor &Data, uint64_t &Offset, + uint64_t BaseAddr) { + InlineInfo Inline; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset); + Inline.Ranges.decode(Data, BaseAddr, Offset); + if (Inline.Ranges.empty()) + return Inline; + if (!Data.isValidOffsetForDataOfSize(Offset, 1)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint8_t indicating children", + Offset); + bool HasChildren = Data.getU8(&Offset) != 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint32_t for name", Offset); + Inline.Name = Data.getU32(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call file", Offset); + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call line", Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + // Child address ranges are encoded relative to the first address in the + // parent InlineInfo object. + const auto ChildBaseAddr = Inline.Ranges[0].Start; + while (true) { + llvm::Expected Child = decode(Data, Offset, ChildBaseAddr); + if (!Child) + return Child.takeError(); + // InlineInfo with empty Ranges termintes a child sibling chain. + if (Child.get().Ranges.empty()) + break; + Inline.Children.emplace_back(std::move(*Child)); + } + } + return Inline; +} + +llvm::Expected InlineInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + uint64_t Offset = 0; + return ::decode(Data, Offset, BaseAddr); +} + +llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const { + // Users must verify the InlineInfo is valid prior to calling this funtion. + // We don't want to emit any InlineInfo objects if they are not valid since + // it will waste space in the GSYM file. + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid InlineInfo object"); + Ranges.encode(O, BaseAddr); + bool HasChildren = !Children.empty(); + O.writeU8(HasChildren); + O.writeU32(Name); + O.writeULEB(CallFile); + O.writeULEB(CallLine); + if (HasChildren) { + // Child address ranges are encoded as relative to the first + // address in the Ranges for this object. This keeps the offsets + // small and allows for efficient encoding using ULEB offsets. + const uint64_t ChildBaseAddr = Ranges[0].Start; + for (const auto &Child : Children) { + // Make sure all child address ranges are contained in the parent address + // ranges. + for (const auto &ChildRange: Child.Ranges) { + if (!Ranges.contains(ChildRange)) + return createStringError(std::errc::invalid_argument, + "child range not contained in parent"); + } + llvm::Error Error = Child.encode(O, ChildBaseAddr); + if (Error) + return Error; + } + + // Terminate child sibling chain by emitting a zero. This zero will cause + // the decodeAll() function above to return false and stop the decoding + // of child InlineInfo objects that are siblings. + O.writeULEB(0); + } + return Error::success(); +} Index: lib/DebugInfo/GSYM/Range.cpp =================================================================== --- lib/DebugInfo/GSYM/Range.cpp +++ lib/DebugInfo/GSYM/Range.cpp @@ -42,6 +42,17 @@ return It != Ranges.begin() && Addr < It[-1].End; } +bool AddressRanges::contains(AddressRange Range) const { + if (Range.size() == 0) + return false; + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.Start <= Range.Start; }); + if (It == Ranges.begin()) + return false; + return Range.Start < It[-1].End && Range.End <= It[-1].End; +} + raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")"; } Index: unittests/DebugInfo/GSYM/GSYMTest.cpp =================================================================== --- unittests/DebugInfo/GSYM/GSYMTest.cpp +++ unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -134,6 +134,67 @@ EXPECT_LT(FIWithLines, FIWithLinesWithHigherAddress); } +void checkError(ArrayRef ExpectedMsgs, Error Err) { + ASSERT_TRUE(Err.operator bool()); + size_t WhichMsg = 0; + Error Remaining = + handleErrors(std::move(Err), [&](const ErrorInfoBase &Actual) { + ASSERT_LT(WhichMsg, ExpectedMsgs.size()); + // Use .str(), because googletest doesn't visualise a StringRef + // properly. + EXPECT_EQ(Actual.message(), ExpectedMsgs[WhichMsg++]); + }); + EXPECT_EQ(WhichMsg, ExpectedMsgs.size()); + EXPECT_FALSE(Remaining); +} + +void checkError(std::string ExpectedMsg, Error Err) { + checkError(ArrayRef{ExpectedMsg}, std::move(Err)); +} + +static void TestInlineInfoEncodeDecode(llvm::support::endianness ByteOrder, + const InlineInfo &Inline) { + // Test encoding and decoding InlineInfo objects + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + const uint64_t BaseAddr = Inline.Ranges[0].Start; + llvm::Error Err = Inline.encode(FW, BaseAddr); + ASSERT_FALSE(Err); + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + llvm::Expected Decoded = InlineInfo::decode(Data, BaseAddr); + // Make sure decoding succeeded. + ASSERT_TRUE((bool)Decoded); + // Make sure decoded object is the same as the one we encoded. + EXPECT_EQ(Inline, Decoded.get()); +} + +static void TestInlineInfoDecodeError(llvm::support::endianness ByteOrder, + std::string Bytes, + const uint64_t BaseAddr, + std::string ExpectedErrorMsg) { + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + llvm::Expected Decoded = InlineInfo::decode(Data, BaseAddr); + // Make sure decoding fails. + ASSERT_FALSE((bool)Decoded); + // Make sure decoded object is the same as the one we encoded. + checkError(ExpectedErrorMsg, Decoded.takeError()); +} + +static void TestInlineInfoEncodeError(llvm::support::endianness ByteOrder, + const InlineInfo &Inline, + std::string ExpectedErrorMsg) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + const uint64_t BaseAddr = Inline.Ranges.empty() ? 0 : Inline.Ranges[0].Start; + llvm::Error Err = Inline.encode(FW, BaseAddr); + checkError(ExpectedErrorMsg, std::move(Err)); +} + TEST(GSYMTest, TestInlineInfo) { // Test InlineInfo structs. InlineInfo II; @@ -226,8 +287,71 @@ ASSERT_EQ(InlineInfos->size(), 2u); ASSERT_EQ(*InlineInfos->at(0), Inline1Sub2); ASSERT_EQ(*InlineInfos->at(1), Inline1); + + // Test encoding and decoding InlineInfo objects + TestInlineInfoEncodeDecode(llvm::support::little, Root); + TestInlineInfoEncodeDecode(llvm::support::big, Root); } +TEST(GSYMTest, TestInlineInfoEncodeErrors) { + // Test InlineInfo encoding errors. + + // Test that we get an error when trying to encode an InlineInfo object + // that has no ranges. + InlineInfo Empty; + std::string EmptyErr("attempted to encode invalid InlineInfo object"); + TestInlineInfoEncodeError(llvm::support::little, Empty, EmptyErr); + TestInlineInfoEncodeError(llvm::support::big, Empty, EmptyErr); + + // Verify that we get an error trying to encode an InlineInfo object that has + // a child InlineInfo that has no ranges. + InlineInfo ContainsEmpty; + ContainsEmpty.Ranges.insert({0x100,200}); + ContainsEmpty.Children.push_back(Empty); + TestInlineInfoEncodeError(llvm::support::little, ContainsEmpty, EmptyErr); + TestInlineInfoEncodeError(llvm::support::big, ContainsEmpty, EmptyErr); + + // Verify that we get an error trying to encode an InlineInfo object that has + // a child whose address range is not contained in the parent address range. + InlineInfo ChildNotContained; + std::string ChildNotContainedErr("child range not contained in parent"); + ChildNotContained.Ranges.insert({0x100,200}); + InlineInfo ChildNotContainedChild; + ChildNotContainedChild.Ranges.insert({0x200,300}); + ChildNotContained.Children.push_back(ChildNotContainedChild); + TestInlineInfoEncodeError(llvm::support::little, ChildNotContained, + ChildNotContainedErr); + TestInlineInfoEncodeError(llvm::support::big, ChildNotContained, + ChildNotContainedErr); + +} + +TEST(GSYMTest, TestInlineInfoDecodeErrors) { + // Test decoding InlineInfo objects that ensure we report an appropriate + // error message. + const llvm::support::endianness ByteOrder = llvm::support::little; + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + const uint64_t BaseAddr = 0x100; + TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000000: missing InlineInfo address ranges data"); + AddressRanges Ranges; + Ranges.insert({BaseAddr, BaseAddr+0x100}); + Ranges.encode(FW, BaseAddr); + TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000004: missing InlineInfo uint8_t indicating children"); + FW.writeU8(0); + TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000005: missing InlineInfo uint32_t for name"); + FW.writeU32(0); + TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x00000009: missing ULEB128 for InlineInfo call file"); + FW.writeU8(0); + TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, + "0x0000000a: missing ULEB128 for InlineInfo call line"); +} + TEST(GSYMTest, TestLineEntry) { // test llvm::gsym::LineEntry structs. const uint64_t ValidAddr = 0x1000; @@ -334,6 +458,18 @@ EXPECT_FALSE(Ranges.contains(0x5000 + 1)); EXPECT_FALSE(Ranges.contains(UINT64_MAX)); + EXPECT_FALSE(Ranges.contains(AddressRange())); + EXPECT_FALSE(Ranges.contains(AddressRange(0x1000-1, 0x1000))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x1000, 0x1000))); + EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x1000+1))); + EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x2000))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x1000, 0x2001))); + EXPECT_TRUE(Ranges.contains(AddressRange(0x2000, 0x3000))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x2000, 0x3001))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x3000, 0x3001))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x1500, 0x4500))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x5000, 0x5001))); + // Verify that intersecting ranges get combined Ranges.clear(); Ranges.insert(AddressRange(0x1100, 0x1F00));