diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h --- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -12,6 +12,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" #include @@ -21,6 +22,7 @@ class raw_ostream; namespace gsym { +class GsymReader; /// Function information in GSYM files encodes information for one contiguous /// address range. If a function has discontiguous address ranges, they will /// need to be encoded using multiple FunctionInfo objects. @@ -140,6 +142,33 @@ /// function info that was successfully written into the stream. llvm::Expected encode(FileWriter &O) const; + + /// Lookup an address within a FunctionInfo object's data stream. + /// + /// Instead of decoding an entire FunctionInfo object when doing lookups, + /// we can decode only the information we need from the FunctionInfo's data + /// for the specific address. The lookup result information is returned as + /// a LookupResult. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param GR The GSYM reader that contains the string and file table that + /// will be used to fill in information in the returned result. + /// + /// \param FuncAddr The function start address decoded from the GsymReader. + /// + /// \param Addr The address to lookup. + /// + /// \returns An LookupResult or an error describing the issue that was + /// encountered during decoding. An error should only be returned if the + /// address is not contained in the FunctionInfo or if the data is corrupted. + static llvm::Expected lookup(DataExtractor &Data, + const GsymReader &GR, + uint64_t FuncAddr, + uint64_t Addr); + uint64_t startAddress() const { return Range.Start; } uint64_t endAddress() const { return Range.End; } uint64_t size() const { return Range.size(); } diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -1,9 +1,8 @@ //===- GsymReader.h ---------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -94,28 +93,45 @@ /// Get the full function info for an address. /// + /// This should be called when a client will store a copy of the complete + /// FunctionInfo for a given address. For one off lookups, use the lookup() + /// function below. + /// + /// Symbolication server processes might want to parse the entire function + /// info for a given address and cache it if the process stays around to + /// service many symbolication addresses, like for parsing profiling + /// information. + /// /// \param Addr A virtual address from the orignal object file to lookup. + /// /// \returns An expected FunctionInfo that contains the function info object /// or an error object that indicates reason for failing to lookup the - /// address, + /// address. llvm::Expected getFunctionInfo(uint64_t Addr) const; + /// Lookup an address in the a GSYM. + /// + /// Lookup just the information needed for a specific address \a Addr. This + /// function is faster that calling getFunctionInfo() as it will only return + /// information that pertains to \a Addr and allows the parsing to skip any + /// extra information encoded for other addresses. For example the line table + /// parsing can stop when a matching LineEntry has been fouhnd, and the + /// InlineInfo can stop parsing early once a match has been found and also + /// skip information that doesn't match. This avoids memory allocations and + /// is much faster for lookups. + /// + /// \param Addr A virtual address from the orignal object file to lookup. + /// \returns An expected LookupResult that contains only the information + /// needed for the current address, or an error object that indicates reason + /// for failing to lookup the address. + llvm::Expected lookup(uint64_t Addr) const; + /// Get a string from the string table. /// /// \param Offset The string table offset for the string to retrieve. /// \returns The string from the strin table. StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } -protected: - /// Gets an address from the address table. - /// - /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. - /// - /// \param Index A index into the address table. - /// \returns A resolved virtual address for adddress in the address table - /// or llvm::None if Index is out of bounds. - Optional getAddress(size_t Index) const; - /// Get the a file entry for the suppplied file index. /// /// Used to convert any file indexes in the FunctionInfo data back into @@ -131,6 +147,16 @@ return llvm::None; } +protected: + /// Gets an address from the address table. + /// + /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. + /// + /// \param Index A index into the address table. + /// \returns A resolved virtual address for adddress in the address table + /// or llvm::None if Index is out of bounds. + Optional getAddress(size_t Index) const; + /// Get an appropriate address info offsets array. /// /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 diff --git a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h --- a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -10,6 +10,8 @@ #define LLVM_DEBUGINFO_GSYM_INLINEINFO_H #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/LookupResult.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/Support/Error.h" #include @@ -21,6 +23,7 @@ namespace gsym { +class GsymReader; /// Inline information stores the name of the inline function along with /// an array of address ranges. It also stores the call file and call line /// that called this inline function. This allows us to unwind inline call @@ -74,6 +77,52 @@ using InlineArray = std::vector; + /// Lookup a single address within the inline info data. + /// + /// Clients have the option to decode an entire InlineInfo object (using + /// InlineInfo::decode() ) or just find the matching inline info using this + /// function. The benefit of using this function is that only the information + /// needed for the lookup will be extracted, other info can be skipped and + /// parsing can stop as soon as the deepest match is found. This allows + /// symbolication tools to be fast and efficient and avoid allocation costs + /// when doing lookups. + /// + /// This function will augment the SourceLocations array \a SrcLocs with any + /// inline information that pertains to \a Addr. If no inline information + /// exists for \a Addr, then \a SrcLocs will be left untouched. If there is + /// inline information for \a Addr, then \a SrcLocs will be modifiied to + /// contain the deepest most inline function's SourceLocation at index zero + /// in the array and proceed up the the concrete function source file and + /// line at the end of the array. + /// + /// \param GR The GSYM reader that contains the string and file table that + /// will be used to fill in the source locations. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the LineTable object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The base address to use when decoding the line table. + /// This will be the FunctionInfo's start address and will be used to + /// decode the correct addresses for the inline information. + /// + /// \param Addr The address to lookup. + /// + /// \param SrcLocs The inline source locations that matches \a Addr. This + /// array must be initialized with the matching line entry + /// from the line table upon entry. The name of the concrete + /// function must be supplied since it will get pushed to + /// the last SourceLocation entry and the inline information + /// will fill in the source file and line from the inline + /// information. + /// + /// \returns An error if the inline information is corrupt, or + /// Error::success() for all other cases, even when no information + /// is added to \a SrcLocs. + static llvm::Error lookup(const GsymReader &GR, DataExtractor &Data, + uint64_t BaseAddr, uint64_t Addr, + SourceLocations &SrcLocs); + /// Lookup an address in the InlineInfo object /// /// This function is used to symbolicate an inline call stack and can diff --git a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h --- a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h @@ -119,8 +119,25 @@ typedef std::vector Collection; Collection Lines; ///< All line entries in the line table. public: - static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr, - uint64_t Addr); + /// Lookup a single address within a line table's data. + /// + /// Clients have the option to decode an entire line table using + /// LineTable::decode() or just find a single matching entry using this + /// function. The benefit of using this function is that parsed LineEntry + /// objects that do not match will not be stored in an array. This will avoid + /// memory allocation costs and parsing can stop once a match has been found. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the LineTable object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The base address to use when decoding the line table. + /// This will be the FunctionInfo's start address and will be used to + /// initialize the line table row prior to parsing any opcodes. + /// + /// \returns An LineEntry object if a match is found, error otherwise. + static Expected lookup(DataExtractor &Data, uint64_t BaseAddr, + uint64_t Addr); /// Decode an LineTable object from a binary data stream. /// diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h @@ -0,0 +1,61 @@ +//===- LookupResult.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H +#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H + +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace llvm { +class raw_ostream; +namespace gsym { +struct FileEntry; + +struct SourceLocation { + StringRef Name; ///< Function or symbol name. + StringRef Dir; ///< Line entry source file directory path. + StringRef Base; ///< Line entry source file basename. + uint32_t Line = 0; ///< Source file line number. +}; + +inline bool operator==(const SourceLocation &LHS, const SourceLocation &RHS) { + return LHS.Name == RHS.Name && LHS.Dir == RHS.Dir && + LHS.Base == RHS.Base && LHS.Line == RHS.Line; +} + +raw_ostream &operator<<(raw_ostream &OS, const SourceLocation &R); + +using SourceLocations = std::vector; + + +struct LookupResult { + uint64_t LookupAddr = 0; ///< The address that this lookup pertains to. + AddressRange FuncRange; ///< The concrete function address range. + StringRef FuncName; ///< The concrete function name that contains LookupAddr. + /// The source locations that match this address. This information will only + /// be filled in if the FunctionInfo contains a line table. If an address is + /// for a concrete function with no inlined functions, this array will have + /// one entry. If an address points to an inline function, there will be one + /// SourceLocation for each inlined function with the last entry pointing to + /// the concrete function itself. This allows one address to generate + /// multiple locations and allows unwinding of inline call stacks. The + /// deepest inline function will appear at index zero in the source locations + /// array, and the concrete function will appear at the end of the array. + SourceLocations Locations; + std::string getSourceFile(uint32_t Index) const; +}; + +raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/Range.h b/llvm/include/llvm/DebugInfo/GSYM/Range.h --- a/llvm/include/llvm/DebugInfo/GSYM/Range.h +++ b/llvm/include/llvm/DebugInfo/GSYM/Range.h @@ -61,6 +61,14 @@ void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); void encode(FileWriter &O, uint64_t BaseAddr) const; /// @} + + /// Skip an address range object in the specified data a the specified + /// offset. + /// + /// \param Data The binary stream to read the data from. + /// + /// \param Offset The byte offset within \a Data. + static void skip(DataExtractor &Data, uint64_t &Offset); }; raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); @@ -100,6 +108,16 @@ void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); void encode(FileWriter &O, uint64_t BaseAddr) const; /// @} + + /// Skip an address range object in the specified data a the specified + /// offset. + /// + /// \param Data The binary stream to read the data from. + /// + /// \param Offset The byte offset within \a Data. + /// + /// \returns The number of address ranges that were skipped. + static uint64_t skip(DataExtractor &Data, uint64_t &Offset); }; raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt --- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt @@ -6,6 +6,7 @@ GsymReader.cpp InlineInfo.cpp LineTable.cpp + LookupResult.cpp Range.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/GSYM/FunctionInfo.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" #include "llvm/DebugInfo/GSYM/LineTable.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/Support/DataExtractor.h" @@ -145,3 +146,104 @@ O.writeU32(0); return FuncInfoOffset; } + + +llvm::Expected FunctionInfo::lookup(DataExtractor &Data, + const GsymReader &GR, + uint64_t FuncAddr, + uint64_t Addr) { + LookupResult LR; + LR.LookupAddr = Addr; + LR.FuncRange.Start = FuncAddr; + uint64_t Offset = 0; + LR.FuncRange.End = FuncAddr + Data.getU32(&Offset); + uint32_t NameOffset = Data.getU32(&Offset); + // The "lookup" functions doesn't report errors as accurately as the "decode" + // function as it is meant to be fast. For more accurage errors we could call + // "decode". + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "FunctionInfo data is truncated"); + // This function will be called with the result of a binary search of the + // address table, we must still make sure the address does not fall into a + // gap between functions are after the last function. + if (Addr >= LR.FuncRange.End) + return createStringError(std::errc::io_error, + "address 0x%" PRIx64 " is not in GSYM", Addr); + + if (NameOffset == 0) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000", + Offset - 4); + LR.FuncName = GR.getString(NameOffset); + bool Done = false; + Optional LineEntry; + Optional InlineInfoData; + while (!Done) { + if (!Data.isValidOffsetForDataOfSize(Offset, 8)) + return createStringError(std::errc::io_error, + "FunctionInfo data is truncated"); + const uint32_t InfoType = Data.getU32(&Offset); + const uint32_t InfoLength = Data.getU32(&Offset); + const StringRef InfoBytes = Data.getData().substr(Offset, InfoLength); + if (InfoLength != InfoBytes.size()) + return createStringError(std::errc::io_error, + "FunctionInfo data is truncated"); + DataExtractor InfoData(InfoBytes, Data.isLittleEndian(), + Data.getAddressSize()); + switch (InfoType) { + case InfoType::EndOfList: + Done = true; + break; + + case InfoType::LineTableInfo: + if (auto ExpectedLE = LineTable::lookup(InfoData, FuncAddr, Addr)) + LineEntry = ExpectedLE.get(); + else + return ExpectedLE.takeError(); + break; + + case InfoType::InlineInfo: + // We will parse the inline info after our line table, but only if + // we have a line entry. + InlineInfoData = InfoData; + break; + + default: + break; + } + Offset += InfoLength; + } + + if (!LineEntry) { + // We don't have a valid line entry for our address, fill in our source + // location as best we can and return. + SourceLocation SrcLoc; + SrcLoc.Name = LR.FuncName; + LR.Locations.push_back(SrcLoc); + return LR; + } + + Optional LineEntryFile = GR.getFile(LineEntry->File); + if (!LineEntryFile) + return createStringError(std::errc::invalid_argument, + "failed to extract file[%" PRIu32 "]", + LineEntry->File); + + SourceLocation SrcLoc; + SrcLoc.Name = LR.FuncName; + SrcLoc.Dir = GR.getString(LineEntryFile->Dir); + SrcLoc.Base = GR.getString(LineEntryFile->Base); + SrcLoc.Line = LineEntry->Line; + LR.Locations.push_back(SrcLoc); + // If we don't have inline information, we are done. + if (!InlineInfoData) + return LR; + // We have inline information. Try to augment the lookup result with this + // data. + llvm::Error Err = InlineInfo::lookup(GR, *InlineInfoData, FuncAddr, Addr, + LR.Locations); + if (Err) + return std::move(Err); + return LR; +} diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -1,9 +1,8 @@ //===- GsymReader.cpp -----------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -263,3 +262,18 @@ "failed to extract address[%" PRIu64 "]", *AddressIndex); } + +llvm::Expected GsymReader::lookup(uint64_t Addr) const { + Expected AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional OptAddr = getAddress(*AddressIndex)) + return FunctionInfo::lookup(Data, *this, *OptAddr, Addr); + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp --- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -1,14 +1,14 @@ //===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/Support/DataExtractor.h" #include @@ -60,6 +60,108 @@ return llvm::None; } +/// Skip an InlineInfo object in the specified data at the specified offset. +/// +/// Used during the InlineInfo::lookup() call to quickly skip child InlineInfo +/// objects where the addres ranges isn't contained in the InlineInfo object +/// or its children. This avoids allocations by not appending child InlineInfo +/// objects to the InlineInfo::Children array. +/// +/// \param Data The binary stream to read the data from. +/// +/// \param Offset The byte offset within \a Data. +/// +/// \param SkippedRanges If true, address ranges have already been skipped. + +static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) { + if (!SkippedRanges) { + if (AddressRanges::skip(Data, Offset) == 0) + return false; + } + bool HasChildren = Data.getU8(&Offset) != 0; + Data.getU32(&Offset); // Skip Inline.Name. + Data.getULEB128(&Offset); // Skip Inline.CallFile. + Data.getULEB128(&Offset); // Skip Inline.CallLine. + if (HasChildren) { + while (skip(Data, Offset, false /* SkippedRanges */)) + /* Do nothing */; + } + // We skipped a valid InlineInfo. + return true; +} + +/// A Lookup helper functions. +/// +/// Used during the InlineInfo::lookup() call to quickly only parse an +/// InlineInfo object if the address falls within this object. This avoids +/// allocations by not appending child InlineInfo objects to the +/// InlineInfo::Children array and also skips any InlineInfo objects that do +/// not contain the address we are looking up. +/// +/// \param Data The binary stream to read the data from. +/// +/// \param Offset The byte offset within \a Data. +/// +/// \param BaseAddr The address that the relative address range offsets are +/// relative to. + +static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, + uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, + llvm::Error &Err) { + InlineInfo Inline; + Inline.Ranges.decode(Data, BaseAddr, Offset); + if (Inline.Ranges.empty()) + return true; + // Check if the address is contained within the inline information, and if + // not, quickly skip this InlineInfo object and all its children. + if (!Inline.Ranges.contains(Addr)) { + skip(Data, Offset, true /* SkippedRanges */); + return false; + } + + // The address range is contained within this InlineInfo, add the source + // location for this InlineInfo and any children that contain the address. + bool HasChildren = Data.getU8(&Offset) != 0; + Inline.Name = Data.getU32(&Offset); + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + // Child address ranges are encoded relative to the first address in the + // parent InlineInfo object. + const auto ChildBaseAddr = Inline.Ranges[0].Start; + bool Done = false; + while (!Done) + Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err); + } + + Optional CallFile = GR.getFile(Inline.CallFile); + if (!CallFile) { + Err = createStringError(std::errc::invalid_argument, + "failed to extract file[%" PRIu32 "]", + Inline.CallFile); + return false; + } + + SourceLocation SrcLoc; + SrcLoc.Name = SrcLocs.back().Name; + SrcLoc.Dir = GR.getString(CallFile->Dir); + SrcLoc.Base = GR.getString(CallFile->Base); + SrcLoc.Line = Inline.CallLine; + SrcLocs.back().Name = GR.getString(Inline.Name); + SrcLocs.push_back(SrcLoc); + return true; +} + +llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data, + uint64_t BaseAddr, uint64_t Addr, + SourceLocations &SrcLocs) { + // Call our recursive helper function starting at offset zero. + uint64_t Offset = 0; + llvm::Error Err = Error::success(); + ::lookup(GR, Data, Offset, BaseAddr, Addr, SrcLocs, Err); + return Err; +} + /// Decode an InlineInfo in Data at the specified offset. /// /// A local helper function to decode InlineInfo objects. This function is diff --git a/llvm/lib/DebugInfo/GSYM/LineTable.cpp b/llvm/lib/DebugInfo/GSYM/LineTable.cpp --- a/llvm/lib/DebugInfo/GSYM/LineTable.cpp +++ b/llvm/lib/DebugInfo/GSYM/LineTable.cpp @@ -262,8 +262,8 @@ // Parse the line table on the fly and find the row we are looking for. // We will need to determine if we need to cache the line table by calling // LineTable::parseAllEntries(...) or just call this function each time. -// There is a CPU vs memory tradeoff we will need to determine. -LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) { +// There is a CPU vs memory tradeoff we will need to determined. +Expected LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) { LineEntry Result; llvm::Error Err = parse(Data, BaseAddr, [Addr, &Result](const LineEntry &Row) -> bool { @@ -277,7 +277,13 @@ } return true; // Keep parsing till we find the right row. }); - return Result; + if (Err) + return std::move(Err); + if (Result.isValid()) + return Result; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " is not in the line table", + Addr); } raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable <) { diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp @@ -0,0 +1,68 @@ +//===- LookupResult.cpp -------------------------------------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LookupResult.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace gsym; + +std::string LookupResult::getSourceFile(uint32_t Index) const { + std::string Fullpath; + if (Index < Locations.size()) { + if (!Locations[Index].Dir.empty()) { + if (Locations[Index].Base.empty()) { + Fullpath = Locations[Index].Dir; + } else { + llvm::SmallString<64> Storage; + llvm::sys::path::append(Storage, Locations[Index].Dir, + Locations[Index].Base); + Fullpath.assign(Storage.begin(), Storage.end()); + } + } else if (!Locations[Index].Base.empty()) + Fullpath = Locations[Index].Base; + } + return Fullpath; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) { + OS << SL.Name << " @ "; + if (!SL.Dir.empty()) { + OS << SL.Dir; + if (SL.Dir.contains('\\') and not SL.Dir.contains('/')) + OS << '\\'; + else + OS << '/'; + } + if (SL.Base.empty()) + OS << ""; + else + OS << SL.Base; + OS << ':' << SL.Line; + return OS; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) { + OS << HEX64(LR.LookupAddr) << ": "; + auto NumLocations = LR.Locations.size(); + for (size_t I = 0; I < NumLocations; ++I) { + if (I > 0) { + OS << '\n'; + OS.indent(20); + } + const bool IsInlined = I + 1 != NumLocations; + OS << LR.Locations[I]; + if (IsInlined) + OS << " [inlined]"; + } + OS << '\n'; + return OS; +} diff --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp --- a/llvm/lib/DebugInfo/GSYM/Range.cpp +++ b/llvm/lib/DebugInfo/GSYM/Range.cpp @@ -100,3 +100,15 @@ for (auto &Range : Ranges) Range.decode(Data, BaseAddr, Offset); } + +void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) { + Data.getULEB128(&Offset); + Data.getULEB128(&Offset); +} + +uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) { + uint64_t NumRanges = Data.getULEB128(&Offset); + for (uint64_t I=0; I using namespace llvm; @@ -1302,3 +1304,100 @@ "address 0x1030 not in GSYM"); } } + +TEST(GSYMTest, TestGsymLookups) { + // Test creating a GSYM file with a function that has a inline information. + // Verify that lookups work correctly. Lookups do not decode the entire + // FunctionInfo or InlineInfo, they only extract information needed for the + // lookup to happen which avoids allocations which can slow down + // symbolication. + GsymCreator GC; + FunctionInfo FI(0x1000, 0x100, GC.insertString("main")); + const auto ByteOrder = support::endian::system_endianness(); + FI.OptLineTable = LineTable(); + const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c"); + const uint32_t FooFileIndex = GC.insertFile("/tmp/foo.h"); + FI.OptLineTable->push(LineEntry(0x1000, MainFileIndex, 5)); + FI.OptLineTable->push(LineEntry(0x1010, FooFileIndex, 10)); + FI.OptLineTable->push(LineEntry(0x1012, FooFileIndex, 20)); + FI.OptLineTable->push(LineEntry(0x1014, FooFileIndex, 11)); + FI.OptLineTable->push(LineEntry(0x1016, FooFileIndex, 30)); + FI.OptLineTable->push(LineEntry(0x1018, FooFileIndex, 12)); + FI.OptLineTable->push(LineEntry(0x1020, MainFileIndex, 8)); + FI.Inline = InlineInfo(); + + FI.Inline->Name = GC.insertString("inline1"); + FI.Inline->CallFile = MainFileIndex; + FI.Inline->CallLine = 6; + FI.Inline->Ranges.insert(AddressRange(0x1010, 0x1020)); + InlineInfo Inline2; + Inline2.Name = GC.insertString("inline2"); + Inline2.CallFile = FooFileIndex; + Inline2.CallLine = 33; + Inline2.Ranges.insert(AddressRange(0x1012, 0x1014)); + FI.Inline->Children.emplace_back(Inline2); + InlineInfo Inline3; + Inline3.Name = GC.insertString("inline3"); + Inline3.CallFile = FooFileIndex; + Inline3.CallLine = 35; + Inline3.Ranges.insert(AddressRange(0x1016, 0x1018)); + FI.Inline->Children.emplace_back(Inline3); + GC.addFunctionInfo(std::move(FI)); + Error FinalizeErr = GC.finalize(llvm::nulls()); + ASSERT_FALSE(FinalizeErr); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_FALSE((bool)Err); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_TRUE(bool(GR)); + + // Verify inline info is correct when doing lookups. + auto LR = GR->lookup(0x1000); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5})); + LR = GR->lookup(0x100F); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5})); + + LR = GR->lookup(0x1010); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 10}, + SourceLocation{"main", "/tmp", "main.c", 6})); + + LR = GR->lookup(0x1012); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline2", "/tmp", "foo.h", 20}, + SourceLocation{"inline1", "/tmp", "foo.h", 33}, + SourceLocation{"main", "/tmp", "main.c", 6})); + + LR = GR->lookup(0x1014); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 11}, + SourceLocation{"main", "/tmp", "main.c", 6})); + + LR = GR->lookup(0x1016); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline3", "/tmp", "foo.h", 30}, + SourceLocation{"inline1", "/tmp", "foo.h", 35}, + SourceLocation{"main", "/tmp", "main.c", 6})); + + LR = GR->lookup(0x1018); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 12}, + SourceLocation{"main", "/tmp", "main.c", 6})); + + LR = GR->lookup(0x1020); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 8})); +}