diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -0,0 +1,112 @@ +//===- DwarfTransformer.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H +#define LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/Support/Error.h" + +namespace llvm { + +class raw_ostream; + +namespace gsym { + +struct CUInfo; +struct FunctionInfo; +class GsymCreator; + +class DwarfTransformer { +public: + + /// Create a DWARF transformer. + /// + /// \param DICtx The DWARF to use when converting to GSYM. + /// + /// \param OS The stream to log warnings and non fatal issues to. + /// + /// \param Gsym The GSYM creator to populate with the function information + /// from the debug info. + DwarfTransformer(DWARFContext &D, raw_ostream &OS, GsymCreator &G) : + DICtx(D), Log(OS), Gsym(G) {} + + /// Extract the DWARF from the supplied object file and convert it into the + /// Gsym format in the GsymCreator object that is passed in. Returns an + /// error if something fatal is encountered. + /// + /// \param DDICtx The DWARF to use when converting to GSYM. + /// + /// \param Log The stream to log warnings and non fatal issues to. + /// + /// \param NumThreads The stream to log warnings and non fatal issues to. + /// + /// \param Gsym The GSYM creator to populate with the function information + /// from the debug info. + /// + /// \returns An error indicating any fatal issues that happen when parsing + /// the DWARF, or Error::success() if all goes well. + llvm::Error convert(uint32_t NumThreads); + + llvm::Error verify(StringRef GsymPath); + + /// Set valid .text address ranges that all functions be contained in. + /// + /// Any functions whose addresses do not exist within these function bounds + /// will not be converted into the final GSYM. This allows the object file + /// to figure out the valid file address ranges of all the code sections + /// and ensure we don't add invalid functions to the final output. Many + /// linkers have issues when dead stripping functions where they set the + /// DW_AT_low_pc to zero, but newer DWARF has the DW_AT_high_pc as an offset + /// from the DW_AT_low_pc and these size attributes have no relocations that + /// can be applied. This results in DWARF where many functions have an + /// DW_AT_low_pc of zero and a valid offset size for DW_AT_high_pc. If we + /// extract all valid ranges from an object file that are marked with + /// executable permissions, we can properly ensure that these functions are + /// removed. + void SetValidTextRanges(AddressRanges &TextRanges) { + ValidTextRanges = TextRanges; + } + +private: + + /// Parse the DWARF in the object file and convert it into the GsymCreator. + Error parse(); + + /// Handle any DIE (debug info entry) from the DWARF. + /// + /// This function will find all DW_TAG_subprogram DIEs that convert them into + /// GSYM FuntionInfo objects and add them to the GsymCreator supplied during + /// construction. The DIE and all its children will be recursively parsed + /// with calls to this function. + /// + /// \param Strm The thread specific log stream for any non fatal errors and + /// warnings. Once a thread has finished parsing an entire compile unit, all + /// information in this temporary stream will be forwarded to the member + /// variable log. This keeps logging thread safe. + /// + /// \param CUI The compile unit specific information that contains the DWARF + /// line table, cached file list, and other compile unit specific + /// information. + /// + /// \param Die The DWARF debug info entry to parse. + void handleDie(raw_ostream &Strm, CUInfo &CUI, DWARFDie Die); + + DWARFContext &DICtx; + raw_ostream &Log; + GsymCreator &Gsym; + Optional ValidTextRanges; + + friend class DwarfTransformerTest; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h --- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -16,6 +16,7 @@ #include #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" #include "llvm/DebugInfo/GSYM/Range.h" @@ -135,6 +136,7 @@ mutable std::recursive_mutex Mutex; std::vector Funcs; StringTableBuilder StrTab; + StringSet<> StringStorage; DenseMap FileEntryToIndex; std::vector Files; std::vector UUID; @@ -162,9 +164,12 @@ /// All strings used by GSYM files must be uniqued by adding them to this /// string pool and using the returned offset for any string values. /// - /// \param S The string to insert into the string table. + /// \param S The string to insert into the string table. + /// \param Copy If true, then make a backing copy of the string. If false, + /// the string is owned by another object that will stay around + /// long enough for the GsymCreator to save the GSYM file. /// \returns The unique 32 bit offset into the string table. - uint32_t insertString(StringRef S); + uint32_t insertString(StringRef S, bool Copy = true); /// Insert a file into this GSYM creator. /// @@ -221,6 +226,9 @@ void forEachFunctionInfo( std::function const &Callback) const; + /// Get the current number of FunctionInfo objects contained in this + /// object. + size_t getNumFunctionInfos() const; }; } // namespace gsym diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -147,7 +147,60 @@ return llvm::None; } -protected: + /// Dump the entire Gsym data contained in this object. + /// + /// \param OS The output stream to dump to. + void dump(raw_ostream &OS); + + /// Dump a FunctionInfo object. + /// + /// This function will convert any string table indexes and file indexes + /// into human readable format. + /// + /// \param OS The output stream to dump to. + /// + /// \param FI The object to dump. + void dump(raw_ostream &OS, const FunctionInfo &FI); + + /// Dump a LineTable object. + /// + /// This function will convert any string table indexes and file indexes + /// into human readable format. + /// + /// + /// \param OS The output stream to dump to. + /// + /// \param LT The object to dump. + void dump(raw_ostream &OS, const LineTable <); + + /// Dump a InlineInfo object. + /// + /// This function will convert any string table indexes and file indexes + /// into human readable format. + /// + /// \param OS The output stream to dump to. + /// + /// \param II The object to dump. + /// + /// \param Indent The indentation as number of spaces. Used for recurive + /// dumping. + void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0); + + /// Dump a FileEntry object. + /// + /// This function will convert any string table indexes into human readable + /// format. + /// + /// \param OS The output stream to dump to. + /// + /// \param FE The object to dump. + void dump(raw_ostream &OS, Optional FE); + + /// Get the number of addresses in this Gsym file. + uint32_t getNumAddresses() const { + return Hdr->NumAddresses; + } + /// Gets an address from the address table. /// /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. @@ -157,6 +210,8 @@ /// or llvm::None if Index is out of bounds. Optional getAddress(size_t Index) const; +protected: + /// Get an appropriate address info offsets array. /// /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 diff --git a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h --- a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h @@ -166,6 +166,24 @@ llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const; bool empty() const { return Lines.empty(); } void clear() { Lines.clear(); } + /// Return the first line entry if the line table isn't empty. + /// + /// \returns An optional line entry with the first line entry if the line + /// table isn't empty, or llvm::None if the line table is emtpy. + Optional first() const { + if (Lines.empty()) + return llvm::None; + return Lines.front(); + } + /// Return the last line entry if the line table isn't empty. + /// + /// \returns An optional line entry with the last line entry if the line + /// table isn't empty, or llvm::None if the line table is emtpy. + Optional last() const { + if (Lines.empty()) + return llvm::None; + return Lines.back(); + } void push(const LineEntry &LE) { Lines.push_back(LE); } diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -59,6 +59,16 @@ return getOffset(CachedHashStringRef(S)); } + /// Check if a string is contained in the string table. Since this class + /// doesn't store the string values, this function can be used to check if + /// storage needs to be done prior to adding the string. + bool contains(StringRef S) const { + return contains(CachedHashStringRef(S)); + } + bool contains(CachedHashStringRef S) const { + return StringIndexMap.find(S) != StringIndexMap.end(); + } + size_t getSize() const { return Size; } void clear(); diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt --- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMDebugInfoGSYM + DwarfTransformer.cpp Header.cpp FileWriter.cpp FunctionInfo.cpp diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -0,0 +1,573 @@ +//===- DwarfTransformer.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" + +using namespace llvm; +using namespace gsym; + +struct llvm::gsym::CUInfo { + const DWARFDebugLine::LineTable *LineTable; + const char *CompDir; + std::vector FileCache; + uint64_t Language = 0; + uint8_t AddrSize = 0; + + CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { + LineTable = DICtx.getLineTableForUnit(CU); + CompDir = CU->getCompilationDir(); + FileCache.clear(); + if (LineTable) + FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); + DWARFDie Die = CU->getUnitDIE(); + Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); + AddrSize = CU->getAddressByteSize(); + } + + /// Return true if Addr is the highest address for a given compile unit. The + /// highest address is encoded as -1, of all ones in the address. These high + /// addresses are used by some linkers to indicate that a function has been + /// dead stripped or didn't end up in the linked executable. + bool isHighestAddress(uint64_t Addr) const { + if (AddrSize == 4) + return Addr == UINT32_MAX; + else if (AddrSize == 8) + return Addr == UINT64_MAX; + return false; + } + + /// Convert a DWARF compile unit file index into a GSYM global file index. + /// + /// Each compile unit in DWARF has its own file table in the line table + /// prologue. GSYM has a single large file table that applies to all files + /// from all of the info in a GSYM file. This function converts between the + /// two and caches and DWARF CU file index that has already been converted so + /// the first client that asks for a compile unit file index will end up + /// doing the conversion, and subsequent clients will get the cached GSYM + /// index. + uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) { + if (!LineTable) + return 0; + assert(DwarfFileIdx < FileCache.size()); + uint32_t GsymFileIdx = FileCache[DwarfFileIdx]; + if (GsymFileIdx != UINT32_MAX) + return GsymFileIdx; + std::string File; + if (LineTable->getFileNameByIndex( + DwarfFileIdx, CompDir, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) + GsymFileIdx = Gsym.insertFile(File); + else + GsymFileIdx = 0; + FileCache[DwarfFileIdx] = GsymFileIdx; + return GsymFileIdx; + } +}; + + +static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { + if (DWARFDie SpecDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { + if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) + return SpecParent; + } + if (DWARFDie AbstDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { + if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) + return AbstParent; + } + + // We never want to follow parent for inlined subroutine - that would + // give us information about where the function is inlined, not what + // function is inlined + if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) + return DWARFDie(); + + DWARFDie ParentDie = Die.getParent(); + if (!ParentDie) + return DWARFDie(); + + switch (ParentDie.getTag()) { + case dwarf::DW_TAG_namespace: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_subprogram: + return ParentDie; // Found parent decl context DIE + case dwarf::DW_TAG_lexical_block: + return GetParentDeclContextDIE(ParentDie); + default: + break; + } + + return DWARFDie(); +} + +/// Get the GsymCreator string table offset for the qualified name for the +/// DIE passed in. This function will avoid making copies of any strings in +/// the GsymCreator when possible. We don't need to copy a string when the +/// string comes from our .debug_str section or is an inlined string in the +/// .debug_info. If we create a qualified name string in this function by +/// combining multiple strings in the DWARF string table or info, we will make +/// a copy of the string when we add it to the string table. +static Optional getQualifiedNameIndex(DWARFDie &Die, + uint64_t Language, + GsymCreator &Gsym) { + // If the dwarf has mangled name, use mangled name + if (auto LinkageName = + dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr)) + return Gsym.insertString(LinkageName, /* Copy */ false); + + StringRef ShortName(Die.getName(DINameKind::ShortName)); + if (ShortName.empty()) + return llvm::None; + + // For C++ and ObjC, prepend names of all parent declaration contexts + if (!(Language == dwarf::DW_LANG_C_plus_plus || + Language == dwarf::DW_LANG_C_plus_plus_03 || + Language == dwarf::DW_LANG_C_plus_plus_11 || + Language == dwarf::DW_LANG_C_plus_plus_14 || + Language == dwarf::DW_LANG_ObjC_plus_plus || + // This should not be needed for C, but we see C++ code marked as C + // in some binaries. This should hurt, so let's do it for C as well + Language == dwarf::DW_LANG_C)) + return Gsym.insertString(ShortName, /* Copy */ false); + + // Some GCC optimizations create functions with names ending with .isra. + // or .part. and those names are just DW_AT_name, not DW_AT_linkage_name + // If it looks like it could be the case, don't add any prefix + if (ShortName.startswith("_Z") && + (ShortName.contains(".isra.") || ShortName.contains(".part."))) + return Gsym.insertString(ShortName, /* Copy */ false); + + DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); + if (ParentDeclCtxDie) { + std::string Name = ShortName.str(); + while (ParentDeclCtxDie) { + StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); + if (!ParentName.empty()) { + // "lambda" names are wrapped in < >. Replace with { } + // to be consistent with demangled names and not to confuse with + // templates + if (ParentName.front() == '<' && ParentName.back() == '>') + Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + + "::" + Name; + else + Name = ParentName.str() + "::" + Name; + } + ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); + } + // Copy the name since we created a new name in a std::string. + return Gsym.insertString(Name, /* Copy */ true); + } + // Don't copy the name since it exists in the DWARF object file. + return Gsym.insertString(ShortName, /* Copy */ false); +} + +static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { + bool CheckChildren = true; + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: + // Don't look into functions within functions. + CheckChildren = Depth == 0; + break; + case dwarf::DW_TAG_inlined_subroutine: + return true; + default: + break; + } + if (!CheckChildren) + return false; + for (DWARFDie ChildDie : Die.children()) { + if (hasInlineInfo(ChildDie, Depth + 1)) + return true; + } + return false; +} + +static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, + uint32_t Depth, FunctionInfo &FI, + InlineInfo &parent) { + if (!hasInlineInfo(Die, Depth)) + return; + + dwarf::Tag Tag = Die.getTag(); + if (Tag == dwarf::DW_TAG_inlined_subroutine) { + // create new InlineInfo and append to parent.children + InlineInfo II; + DWARFAddressRange FuncRange = + DWARFAddressRange(FI.startAddress(), FI.endAddress()); + Expected RangesOrError = Die.getAddressRanges(); + if (RangesOrError) { + for (const DWARFAddressRange &Range : RangesOrError.get()) { + // Check that the inlined function is within the range of the function + // info, it might not be in case of split functions + if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC) + II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC)); + } + } + if (II.Ranges.empty()) + return; + + if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) + II.Name = *NameIndex; + II.CallFile = CUI.DWARFToGSYMFileIndex( + Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); + II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); + // parse all children and append to parent + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II); + parent.Children.emplace_back(std::move(II)); + return; + } + if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { + // skip this Die and just recurse down + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent); + } +} + +static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI, + DWARFDie Die, GsymCreator &Gsym, + FunctionInfo &FI) { + std::vector RowVector; + const uint64_t StartAddress = FI.startAddress(); + const uint64_t EndAddress = FI.endAddress(); + const uint64_t RangeSize = EndAddress - StartAddress; + const object::SectionedAddress SecAddress{ + StartAddress, object::SectionedAddress::UndefSection}; + + + if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { + // If we have a DW_TAG_subprogram but no line entries, fall back to using + // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. + if (auto FileIdx = + dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) { + if (auto Line = + dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { + LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx), + *Line); + FI.OptLineTable = LineTable(); + FI.OptLineTable->push(LE); + // LE.Addr = EndAddress; + // FI.OptLineTable->push(LE); + } + } + return; + } + + FI.OptLineTable = LineTable(); + DWARFDebugLine::Row PrevRow; + for (uint32_t RowIndex : RowVector) { + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; + const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); + uint64_t RowAddress = Row.Address.Address; + // Watch out for a RowAddress that is in the middle of a line table entry + // in the DWARF. If we pass an address in between two line table entries + // we will get a RowIndex for the previous valid line table row which won't + // be contained in our function. This is usually a bug in the DWARF due to + // linker problems or LTO or other DWARF re-linking so it is worth emitting + // an error, but not worth stopping the creation of the GSYM. + if (!FI.Range.contains(RowAddress)) { + if (RowAddress < FI.Range.Start) { + Log << "error: DIE has a start address whose LowPC is between the " + "line table Row[" << RowIndex << "] with address " + << HEX64(RowAddress) << " and the next one.\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + RowAddress = FI.Range.Start; + } else { + continue; + } + } + + LineEntry LE(RowAddress, FileIdx, Row.Line); + if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { + // We have seen full duplicate line tables for functions in some + // DWARF files. Watch for those here by checking the the last + // row was the function's end address (HighPC) and that the + // current line table entry's address is the same as the first + // line entry we already have in our "function_info.Lines". If + // so break out after printing a warning. + auto FirstLE = FI.OptLineTable->first(); + if (FirstLE && *FirstLE == LE) { + Log << "warning: duplicate line table detected for DIE:\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } else { + // Print out (ignore if os == nulls as this is expensive) + Log << "error: line table has addresses that do not " + << "monotonically increase:\n"; + for (uint32_t RowIndex2 : RowVector) { + CUI.LineTable->Rows[RowIndex2].dump(Log); + } + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + + // Skip multiple line entries for the same file and line. + auto LastLE = FI.OptLineTable->last(); + if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) + continue; + // Only push a row if it isn't an end sequence. End sequence markers are + // included for the last address in a function or the last contiguous + // address in a sequence. + if (Row.EndSequence) { + // End sequence means that the next line entry could have a lower address + // that the previous entries. So we clear the previous row so we don't + // trigger the line table error about address that do not monotonically + // increase. + PrevRow = DWARFDebugLine::Row(); + } else { + FI.OptLineTable->push(LE); + PrevRow = Row; + } + } + // If not line table rows were added, clear the line table so we don't encode + // on in the GSYM file. + if (FI.OptLineTable->empty()) + FI.OptLineTable = llvm::None; +} + +void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: { + Expected RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) { + consumeError(RangesOrError.takeError()); + break; + } + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + if (Ranges.empty()) + break; + auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); + if (!NameIndex) { + OS << "error: function at " << HEX64(Die.getOffset()) + << " has no name\n "; + Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); + break; + } + + // Create a function_info for each range + for (const DWARFAddressRange &Range : Ranges) { + // The low PC must be less than the high PC. Many linkers don't remove + // DWARF for functions that don't get linked into the final executable. + // If both the high and low pc have relocations, linkers will often set + // the address values for both to the same value to indicate the function + // has been remove. Other linkers have been known to set the one or both + // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 + // byte addresses to indicate the function isn't valid. The check below + // tries to watch for these cases and abort if it runs into them. + if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) + break; + + // Many linkers can't remove DWARF and might set the LowPC to zero. Since + // high PC can be an offset from the low PC in more recent DWARF versions + // we need to watch for a zero'ed low pc which we do using + // ValidTextRanges below. + if (ValidTextRanges && !ValidTextRanges->contains(Range.LowPC)) { + // We expect zero and -1 to be invalid addresses in DWARF depending + // on the linker of the DWARF. This indicates a function was stripped + // and the debug info wasn't able to be stripped from the DWARF. If + // the LowPC isn't zero or -1, then we should emit an error. + if (Range.LowPC != 0) { + // Unexpected invalid address, emit an error + Log << "warning: DIE has an address range whose start address is " + "not in any executable sections (" << *ValidTextRanges << + ") and will not be processed:\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + + FunctionInfo FI; + FI.setStartAddress(Range.LowPC); + FI.setEndAddress(Range.HighPC); + FI.Name = *NameIndex; + if (CUI.LineTable) { + convertFunctionLineTable(OS, CUI, Die, Gsym, FI); + } + if (hasInlineInfo(Die, 0)) { + FI.Inline = InlineInfo(); + FI.Inline->Name = *NameIndex; + FI.Inline->Ranges.insert(FI.Range); + parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline); + } + Gsym.addFunctionInfo(std::move(FI)); + } + } break; + default: + break; + } + for (DWARFDie ChildDie : Die.children()) + handleDie(OS, CUI, ChildDie); +} + +Error DwarfTransformer::convert(uint32_t NumThreads) { + size_t NumBefore = Gsym.getNumFunctionInfos(); + if (NumThreads == 1) { + // Parse all DWARF data from this thread, use the same string/file table + // for everything + for (const auto &CU : DICtx.compile_units()) { + DWARFDie Die = CU->getUnitDIE(false); + CUInfo CUI(DICtx, dyn_cast(CU.get())); + handleDie(Log, CUI, Die); + } + } else { + // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up + // front before we start accessing any DIEs since there might be + // cross compile unit references in the DWARF. If we don't do this we can + // end up crashing. + + // We need to call getAbbreviations sequentially first so that getUnitDIE() + // only works with its local data. + for (const auto &CU : DICtx.compile_units()) + CU->getAbbreviations(); + + // Now parse all DIEs in case we have cross compile unit references in a + // thread pool. + ThreadPool pool(NumThreads); + for (const auto &CU : DICtx.compile_units()) + pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); + pool.wait(); + + // Now convert all DWARF to GSYM in a thread pool. + std::mutex LogMutex; + for (const auto &CU : DICtx.compile_units()) { + DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); + if (Die) { + CUInfo CUI(DICtx, dyn_cast(CU.get())); + pool.async([this, CUI, &LogMutex, Die]() mutable { + std::string ThreadLogStorage; + raw_string_ostream ThreadOS(ThreadLogStorage); + handleDie(ThreadOS, CUI, Die); + ThreadOS.flush(); + if (!ThreadLogStorage.empty()) { + // Print ThreadLogStorage lines into an actual stream under a lock + std::lock_guard guard(LogMutex); + Log << ThreadLogStorage; + } + }); + } + } + pool.wait(); + } + size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; + return Error::success(); +} + +llvm::Error DwarfTransformer::verify(StringRef GsymPath) { + Log << "Verifying GSYM file \"" << GsymPath << "\":\n"; + + auto Gsym = GsymReader::openFile(GsymPath); + if (!Gsym) + return Gsym.takeError(); + + auto NumAddrs = Gsym->getNumAddresses(); + DILineInfoSpecifier DLIS( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + std::string gsymFilename; + for (uint32_t I = 0; I < NumAddrs; ++I) { + auto FuncAddr = Gsym->getAddress(I); + if (!FuncAddr) + return createStringError(std::errc::invalid_argument, + "failed to extract address[%i]", I); + + auto FI = Gsym->getFunctionInfo(*FuncAddr); + if (!FI) + return createStringError(std::errc::invalid_argument, + "failed to extract function info for address 0x%" + PRIu64, *FuncAddr); + + for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { + const object::SectionedAddress SectAddr{ + Addr, object::SectionedAddress::UndefSection}; + auto LR = Gsym->lookup(Addr); + if (!LR) + return LR.takeError(); + + auto DwarfInlineInfos = + DICtx.getInliningInfoForAddress(SectAddr, DLIS); + uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); + if (NumDwarfInlineInfos == 0) { + DwarfInlineInfos.addFrame( + DICtx.getLineInfoForAddress(SectAddr, DLIS)); + } + + // Check for 1 entry that has no file and line info + if (NumDwarfInlineInfos == 1 && + DwarfInlineInfos.getFrame(0).FileName == "") { + DwarfInlineInfos = DIInliningInfo(); + NumDwarfInlineInfos = 0; + } + if (NumDwarfInlineInfos > 0 && + NumDwarfInlineInfos != LR->Locations.size()) { + Log << "error: address " << HEX64(Addr) << " has " + << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " + << LR->Locations.size() << "\n"; + Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; + for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { + const auto dii = DwarfInlineInfos.getFrame(Idx); + Log << " [" << Idx << "]: " << dii.FunctionName << " @ " + << dii.FileName << ':' << dii.Line << '\n'; + } + Log << " " << LR->Locations.size() << " GSYM frames:\n"; + for (size_t Idx = 0, count = LR->Locations.size(); + Idx < count; ++Idx) { + const auto &gii = LR->Locations[Idx]; + Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir + << '/' << gii.Base << ':' << gii.Line << '\n'; + } + DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); + Gsym->dump(Log, *FI); + continue; + } + + for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; + ++Idx) { + const auto &gii = LR->Locations[Idx]; + if (Idx < NumDwarfInlineInfos) { + const auto dii = DwarfInlineInfos.getFrame(Idx); + gsymFilename = LR->getSourceFile(Idx); + // Verify function name + if (dii.FunctionName.find(gii.Name.str()) != 0) + Log << "error: address " << HEX64(Addr) << " DWARF function \"" + << dii.FunctionName.c_str() + << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; + // Verify source file path + if (dii.FileName != gsymFilename) + Log << "error: address " << HEX64(Addr) << " DWARF path \"" + << dii.FileName.c_str() << "\" doesn't match GSYM path \"" + << gsymFilename.c_str() << "\"\n"; + // Verify source file line + if (dii.Line != gii.Line) + Log << "error: address " << HEX64(Addr) << " DWARF line " + << dii.Line << " != GSYM line " << gii.Line << "\n"; + } + } + } + } + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -25,8 +25,11 @@ }; raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { - OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): " - << "Name=" << HEX32(FI.Name) << '\n' << FI.OptLineTable << FI.Inline; + OS << FI.Range << ": " << "Name=" << HEX32(FI.Name) << '\n'; + if (FI.OptLineTable) + OS << FI.OptLineTable << '\n'; + if (FI.Inline) + OS << FI.Inline << '\n'; return OS; } diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -203,9 +203,8 @@ // that have debug info are last in the sort. if (*Prev == *Curr) { // FunctionInfo entries match exactly (range, lines, inlines) - OS << "warning: duplicate function info entries, removing " - "duplicate:\n" - << *Curr << '\n'; + OS << "warning: duplicate function info entries for range: " + << Curr->Range << '\n'; Curr = Funcs.erase(Prev); } else { if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { @@ -244,10 +243,21 @@ return Error::success(); } -uint32_t GsymCreator::insertString(StringRef S) { - std::lock_guard Guard(Mutex); +uint32_t GsymCreator::insertString(StringRef S, bool Copy) { if (S.empty()) return 0; + std::lock_guard Guard(Mutex); + if (Copy) { + // We need to provide backing storage for the string if requested + // since StringTableBuilder stores references to strings. Any string + // that comes from a section in an object file doesn't need to be + // copied, but any string created by code will need to be copied. + // This allows GsymCreator to be really fast when parsing DWARF and + // other object files as most strings don't need to be copied. + CachedHashStringRef CHStr(S); + if (!StrTab.contains(CHStr)) + S = StringStorage.insert(S).first->getKey(); + } return StrTab.add(S); } @@ -273,3 +283,9 @@ break; } } + +size_t GsymCreator::getNumFunctionInfos() const{ + std::lock_guard Guard(Mutex); + return Funcs.size(); +} + diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -277,3 +277,117 @@ "failed to extract address[%" PRIu64 "]", *AddressIndex); } + +void GsymReader::dump(raw_ostream &OS) { + const auto &Header = getHeader(); + // Dump the GSYM header. + OS << Header << "\n"; + // Dump the address table. + OS << "Address Table:\n"; + OS << "INDEX OFFSET"; + + switch (Hdr->AddrOffSize) { + case 1: OS << "8 "; break; + case 2: OS << "16"; break; + case 4: OS << "32"; break; + case 8: OS << "64"; break; + default: OS << "??"; break; + } + OS << " (ADDRESS)\n"; + OS << "====== =============================== \n"; + for (uint32_t I = 0; I < Header.NumAddresses; ++I) { + OS << format("[%4u] ", I); + switch (Hdr->AddrOffSize) { + case 1: OS << HEX8(getAddrOffsets()[I]); break; + case 2: OS << HEX16(getAddrOffsets()[I]); break; + case 4: OS << HEX32(getAddrOffsets()[I]); break; + case 8: OS << HEX32(getAddrOffsets()[I]); break; + default: break; + } + OS << " (" << HEX64(*getAddress(I)) << ")\n"; + } + // Dump the address info offsets table. + OS << "\nAddress Info Offsets:\n"; + OS << "INDEX Offset\n"; + OS << "====== ==========\n"; + for (uint32_t I = 0; I < Header.NumAddresses; ++I) + OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n"; + // Dump the file table. + OS << "\nFiles:\n"; + OS << "INDEX DIRECTORY BASENAME PATH\n"; + OS << "====== ========== ========== ==============================\n"; + for (uint32_t I = 0; I < Files.size(); ++I) { + OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' ' + << HEX32(Files[I].Base) << ' '; + dump(OS, getFile(I)); + OS << "\n"; + } + OS << "\n" << StrTab; + + for (uint32_t I = 0; I < Header.NumAddresses; ++I) { + OS << "\nFunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": "; + if (auto FI = getFunctionInfo(*getAddress(I))) + dump(OS, *FI); + else + logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:"); + } +} + +void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) { + OS << FI.Range << " \"" << getString(FI.Name) << "\"\n"; + if (FI.OptLineTable) + dump(OS, *FI.OptLineTable); + if (FI.Inline) + dump(OS, *FI.Inline); +} + +void GsymReader::dump(raw_ostream &OS, const LineTable <) { + OS << "LineTable:\n"; + for (auto &LE: LT) { + OS << " " << HEX64(LE.Addr) << ' '; + if (LE.File) + dump(OS, getFile(LE.File)); + OS << ':' << LE.Line << '\n'; + } +} + +void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) { + if (Indent == 0) + OS << "InlineInfo:\n"; + else + OS.indent(Indent); + OS << II.Ranges << ' ' << getString(II.Name); + if (II.CallFile != 0) { + if (auto File = getFile(II.CallFile)) { + OS << " called from "; + dump(OS, File); + OS << ':' << II.CallLine; + } + } + OS << '\n'; + for (const auto &ChildII: II.Children) + dump(OS, ChildII, Indent + 2); +} + +void GsymReader::dump(raw_ostream &OS, Optional FE) { + if (FE) { + // IF we have the file from index 0, then don't print anything + if (FE->Dir == 0 && FE->Base == 0) + return; + StringRef Dir = getString(FE->Dir); + StringRef Base = getString(FE->Base); + if (!Dir.empty()) { + OS << Dir; + if (Dir.contains('\\') and not Dir.contains('/')) + OS << '\\'; + else + OS << '/'; + } + if (!Base.empty()) { + OS << Base; + } + if (!Dir.empty() || !Base.empty()) + return; + } + OS << ""; +} diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp --- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -142,13 +142,15 @@ return false; } - SourceLocation SrcLoc; - SrcLoc.Name = SrcLocs.back().Name; - SrcLoc.Dir = GR.getString(CallFile->Dir); - SrcLoc.Base = GR.getString(CallFile->Base); - SrcLoc.Line = Inline.CallLine; - SrcLocs.back().Name = GR.getString(Inline.Name); - SrcLocs.push_back(SrcLoc); + if (CallFile->Dir || CallFile->Base) { + SourceLocation SrcLoc; + SrcLoc.Name = SrcLocs.back().Name; + SrcLoc.Dir = GR.getString(CallFile->Dir); + SrcLoc.Base = GR.getString(CallFile->Base); + SrcLoc.Line = Inline.CallLine; + SrcLocs.back().Name = GR.getString(Inline.Name); + SrcLocs.push_back(SrcLoc); + } return true; } diff --git a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt --- a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt @@ -1,7 +1,9 @@ set(LLVM_LINK_COMPONENTS + DebugInfoDWARF DebugInfoGSYM MC Support + ObjectYAML ) add_llvm_unittest(DebugInfoGSYMTests diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -9,6 +9,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" #include "llvm/DebugInfo/GSYM/Header.h" #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" @@ -18,6 +20,7 @@ #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/ObjectYAML/DWARFEmitter.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Endian.h" #include "llvm/Testing/Support/Error.h" @@ -1399,3 +1402,1150 @@ EXPECT_THAT(LR->Locations, testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 8})); } + + +TEST(GSYMTest, TestDWARFFunctionWithAddresses) { + // Create a single compile unit with a single function and make sure it gets + // converted to DWARF correctly. The function's address range is in where + // DW_AT_low_pc and DW_AT_high_pc are both addresses. + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + - main + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + debug_info: + - Length: + TotalLength: 52 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000002000 + - Value: 0x0000000000000004 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000001000 + - Value: 0x0000000000002000 + - AbbrCode: 0x00000000 + Values: + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + // There should only be one function in our GSYM. + EXPECT_EQ(GR->getNumAddresses(), 1u); + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + EXPECT_FALSE(ExpFI->OptLineTable.hasValue()); + EXPECT_FALSE(ExpFI->Inline.hasValue()); +} + +TEST(GSYMTest, TestDWARFFunctionWithAddressAndOffset) { + // Create a single compile unit with a single function and make sure it gets + // converted to DWARF correctly. The function's address range is in where + // DW_AT_low_pc is an address and the DW_AT_high_pc is an offset. + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + - main + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + debug_info: + - Length: + TotalLength: 44 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000004 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - AbbrCode: 0x00000000 + Values: + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + // There should only be one function in our GSYM. + EXPECT_EQ(GR->getNumAddresses(), 1u); + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + EXPECT_FALSE(ExpFI->OptLineTable.hasValue()); + EXPECT_FALSE(ExpFI->Inline.hasValue()); +} + +TEST(GSYMTest, TestDWARFStructMethodNoMangled) { + // Sometimes the compiler will omit the mangled name in the DWARF for static + // and member functions of classes and structs. This test verifies that the + // fully qualified name of the method is computed and used as the string for + // the function in the GSYM in these cases. Otherwise we might just get a + // function name like "erase" instead of "std::vector::erase". + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + - Foo + - dump + - this + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x00000002 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Code: 0x00000003 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + - Code: 0x00000004 + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_artificial + Form: DW_FORM_flag_present + debug_info: + - Length: + TotalLength: 68 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000002000 + - Value: 0x0000000000000004 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - AbbrCode: 0x00000003 + Values: + - Value: 0x0000000000000011 + - Value: 0x0000000000001000 + - Value: 0x0000000000002000 + - AbbrCode: 0x00000004 + Values: + - Value: 0x0000000000000016 + - Value: 0x0000000000000022 + - Value: 0x0000000000000001 + - AbbrCode: 0x00000000 + Values: + - AbbrCode: 0x00000000 + Values: + - AbbrCode: 0x00000000 + Values: + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + // There should only be one function in our GSYM. + EXPECT_EQ(GR->getNumAddresses(), 1u); + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + EXPECT_FALSE(ExpFI->OptLineTable.hasValue()); + EXPECT_FALSE(ExpFI->Inline.hasValue()); + StringRef MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "Foo::dump"); +} + +TEST(GSYMTest, TestDWARFTextRanges) { + // Linkers don't understand DWARF, they just like to concatenate and + // relocate data within the DWARF sections. This means that if a function + // gets dead stripped, and if those functions use an offset as the + // DW_AT_high_pc, we can end up with many functions at address zero. The + // DwarfTransformer allows clients to specify valid .text address ranges + // and any addresses of any functions must fall within those ranges if any + // have been specified. This means that an object file can calcuate the + // address ranges within the binary where code lives and set these ranges + // as constraints in the DwarfTransformer. ObjectFile instances can + // add a address ranges of sections that have executable permissions. This + // keeps bad information from being added to a GSYM file and causing issues + // when symbolicating. + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + - main + - dead_stripped + - dead_stripped2 + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + debug_info: + - Length: + TotalLength: 78 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000004 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - AbbrCode: 0x00000002 + Values: + - Value: 0x0000000000000012 + - Value: 0x0000000000000000 + - Value: 0x0000000000000100 + - AbbrCode: 0x00000002 + Values: + - Value: 0x0000000000000020 + - Value: 0x0000000000000000 + - Value: 0x0000000000000040 + - AbbrCode: 0x00000000 + Values: + + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + // Only allow addresses between [0x1000 - 0x2000) to be linked into the + // GSYM. + AddressRanges TextRanges; + TextRanges.insert(AddressRange(0x1000, 0x2000)); + DT.SetValidTextRanges(TextRanges); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + // There should only be one function in our GSYM. + EXPECT_EQ(GR->getNumAddresses(), 1u); + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + EXPECT_FALSE(ExpFI->OptLineTable.hasValue()); + EXPECT_FALSE(ExpFI->Inline.hasValue()); + StringRef MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "main"); +} + +TEST(GSYMTest, TestDWARFInlineInfo) { + // Make sure we parse the line table and inline information correctly from + // DWARF. + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + - main + - inline1 + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Code: 0x00000003 + Tag: DW_TAG_inlined_subroutine + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_call_file + Form: DW_FORM_data4 + - Attribute: DW_AT_call_line + Form: DW_FORM_data4 + debug_info: + - Length: + TotalLength: 74 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000004 + - Value: 0x0000000000000000 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - AbbrCode: 0x00000003 + Values: + - Value: 0x0000000000000012 + - Value: 0x0000000000001100 + - Value: 0x0000000000000100 + - Value: 0x0000000000000001 + - Value: 0x000000000000000A + - AbbrCode: 0x00000000 + Values: + - AbbrCode: 0x00000000 + Values: + debug_line: + - Length: + TotalLength: 96 + Version: 2 + PrologueLength: 46 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + IncludeDirs: + - /tmp + Files: + - Name: main.c + DirIdx: 1 + ModTime: 0 + Length: 0 + - Name: inline.h + DirIdx: 1 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4096 + - Opcode: DW_LNS_advance_line + SData: 9 + Data: 4096 + - Opcode: DW_LNS_copy + Data: 4096 + - Opcode: DW_LNS_advance_pc + Data: 256 + - Opcode: DW_LNS_set_file + Data: 2 + - Opcode: DW_LNS_advance_line + SData: 10 + Data: 2 + - Opcode: DW_LNS_copy + Data: 2 + - Opcode: DW_LNS_advance_pc + Data: 128 + - Opcode: DW_LNS_advance_line + SData: 1 + Data: 128 + - Opcode: DW_LNS_copy + Data: 128 + - Opcode: DW_LNS_advance_pc + Data: 128 + - Opcode: DW_LNS_set_file + Data: 1 + - Opcode: DW_LNS_advance_line + SData: -10 + Data: 1 + - Opcode: DW_LNS_copy + Data: 1 + - Opcode: DW_LNS_advance_pc + Data: 3584 + - Opcode: DW_LNS_advance_line + SData: 1 + Data: 3584 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 3584 + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + // There should only be one function in our GSYM. + EXPECT_EQ(GR->getNumAddresses(), 1u); + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + EXPECT_TRUE(ExpFI->OptLineTable.hasValue()); + EXPECT_TRUE(ExpFI->Inline.hasValue()); + StringRef MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "main"); + + // Verify inline info is correct when doing lookups. + auto LR = GR->lookup(0x1000); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 10})); + LR = GR->lookup(0x1100-1); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 10})); + + LR = GR->lookup(0x1100); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline1", "/tmp", "inline.h", 20}, + SourceLocation{"main", "/tmp", "main.c", 10})); + LR = GR->lookup(0x1180-1); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline1", "/tmp", "inline.h", 20}, + SourceLocation{"main", "/tmp", "main.c", 10})); + LR = GR->lookup(0x1180); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline1", "/tmp", "inline.h", 21}, + SourceLocation{"main", "/tmp", "main.c", 10})); + LR = GR->lookup(0x1200-1); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"inline1", "/tmp", "inline.h", 21}, + SourceLocation{"main", "/tmp", "main.c", 10})); + LR = GR->lookup(0x1200); + ASSERT_THAT_EXPECTED(LR, Succeeded()); + EXPECT_THAT(LR->Locations, + testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 11})); +} + + +TEST(GSYMTest, TestDWARFNoLines) { + // Check that if a DW_TAG_subprogram doesn't have line table entries that + // we fall back and use the DW_AT_decl_file and DW_AT_decl_line to at least + // point to the function definition. This DWARF file has 4 functions: + // "lines_no_decl": has line table entries, no DW_AT_decl_file/line attrs. + // "lines_with_decl": has line table entries and has DW_AT_decl_file/line, + // make sure we don't use DW_AT_decl_file/line and make + // sure there is a line table. + // "no_lines_no_decl": no line table entries and no DW_AT_decl_file/line, + // make sure there is no line table for this function. + // "no_lines_with_decl": no line table and has DW_AT_decl_file/line, make + // sure we have one line table entry that starts at + // the function start address and the decl file and + // line. + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("/tmp/main.c") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000002000) + // DW_AT_language (DW_LANG_C_plus_plus) + // DW_AT_stmt_list (0x00000000) + // + // 0x00000022: DW_TAG_subprogram + // DW_AT_name ("lines_no_decl") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000002000) + // + // 0x00000033: DW_TAG_subprogram + // DW_AT_name ("lines_with_decl") + // DW_AT_low_pc (0x0000000000002000) + // DW_AT_high_pc (0x0000000000003000) + // DW_AT_decl_file ("/tmp/main.c") + // DW_AT_decl_line (20) + // + // 0x00000046: DW_TAG_subprogram + // DW_AT_name ("no_lines_no_decl") + // DW_AT_low_pc (0x0000000000003000) + // DW_AT_high_pc (0x0000000000004000) + // + // 0x00000057: DW_TAG_subprogram + // DW_AT_name ("no_lines_with_decl") + // DW_AT_low_pc (0x0000000000004000) + // DW_AT_high_pc (0x0000000000005000) + // DW_AT_decl_file ("/tmp/main.c") + // DW_AT_decl_line (40) + // + // 0x0000006a: NULL + + StringRef yamldata = R"( + debug_str: + - '' + - '/tmp/main.c' + - lines_no_decl + - lines_with_decl + - no_lines_no_decl + - no_lines_with_decl + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Code: 0x00000003 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + debug_info: + - Length: + TotalLength: 103 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000004 + - Value: 0x0000000000000000 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - AbbrCode: 0x00000003 + Values: + - Value: 0x000000000000001B + - Value: 0x0000000000002000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000001 + - Value: 0x0000000000000014 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000002B + - Value: 0x0000000000003000 + - Value: 0x0000000000001000 + - AbbrCode: 0x00000003 + Values: + - Value: 0x000000000000003C + - Value: 0x0000000000004000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000001 + - Value: 0x0000000000000028 + - AbbrCode: 0x00000000 + Values: [] + debug_line: + - Length: + TotalLength: 92 + Version: 2 + PrologueLength: 34 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + IncludeDirs: + - '/tmp' + Files: + - Name: main.c + DirIdx: 1 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4096 + - Opcode: DW_LNS_advance_line + SData: 10 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 512 + - Opcode: DW_LNS_advance_line + SData: 1 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 3584 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 8192 + - Opcode: DW_LNS_advance_line + SData: 20 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 512 + - Opcode: DW_LNS_advance_line + SData: 1 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 3584 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + + EXPECT_EQ(GR->getNumAddresses(), 4u); + + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + EXPECT_TRUE(ExpFI->OptLineTable.hasValue()); + StringRef MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "lines_no_decl"); + // Make sure have two line table entries and that get the first line entry + // correct. + EXPECT_EQ(ExpFI->OptLineTable->size(), 2u); + EXPECT_EQ(ExpFI->OptLineTable->first()->Addr, 0x1000u); + EXPECT_EQ(ExpFI->OptLineTable->first()->Line, 11u); + + ExpFI = GR->getFunctionInfo(0x2000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x2000, 0x3000)); + EXPECT_TRUE(ExpFI->OptLineTable.hasValue()); + MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "lines_with_decl"); + // Make sure have two line table entries and that we don't use line 20 + // from the DW_AT_decl_file/line as a line table entry. + EXPECT_EQ(ExpFI->OptLineTable->size(), 2u); + EXPECT_EQ(ExpFI->OptLineTable->first()->Addr, 0x2000u); + EXPECT_EQ(ExpFI->OptLineTable->first()->Line, 21u); + + ExpFI = GR->getFunctionInfo(0x3000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x3000, 0x4000)); + // Make sure we have no line table. + EXPECT_FALSE(ExpFI->OptLineTable.hasValue()); + MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "no_lines_no_decl"); + + ExpFI = GR->getFunctionInfo(0x4000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x4000, 0x5000)); + EXPECT_TRUE(ExpFI->OptLineTable.hasValue()); + MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "no_lines_with_decl"); + // Make sure we have one line table entry that uses the DW_AT_decl_file/line + // as the one and only line entry. + EXPECT_EQ(ExpFI->OptLineTable->size(), 1u); + EXPECT_EQ(ExpFI->OptLineTable->first()->Addr, 0x4000u); + EXPECT_EQ(ExpFI->OptLineTable->first()->Line, 40u); +} + + +TEST(GSYMTest, TestDWARFDeadStripAddr4) { + // Check that various techniques that compilers use for dead code stripping + // work for 4 byte addresses. Make sure we keep the good functions and + // strip any functions whose name starts with "stripped". + // + // 1 - Compilers might set the low PC to -1 (UINT32_MAX) for compile unit + // with 4 byte addresses ("stripped1") + // 2 - Set the low and high PC to the same value ("stripped2") + // 3 - Have the high PC lower than the low PC ("stripped3") + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("/tmp/main.c") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000002000) + // DW_AT_language (DW_LANG_C_plus_plus) + // + // 0x0000001a: DW_TAG_subprogram + // DW_AT_name ("main") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000002000) + // + // 0x00000027: DW_TAG_subprogram + // DW_AT_name ("stripped1") + // DW_AT_low_pc (0x00000000ffffffff) + // DW_AT_high_pc (0x0000000100000000) + // + // 0x00000034: DW_TAG_subprogram + // DW_AT_name ("stripped2") + // DW_AT_low_pc (0x0000000000003000) + // DW_AT_high_pc (0x0000000000003000) + // + // 0x00000041: DW_TAG_subprogram + // DW_AT_name ("stripped3") + // DW_AT_low_pc (0x0000000000004000) + // DW_AT_high_pc (0x0000000000003fff) + // + // 0x0000004e: NULL + + + StringRef yamldata = R"( + debug_str: + - '' + - '/tmp/main.c' + - main + - stripped1 + - stripped2 + - stripped3 + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Code: 0x00000003 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + debug_info: + - Length: + TotalLength: 75 + Version: 4 + AbbrOffset: 0 + AddrSize: 4 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000004 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - AbbrCode: 0x00000002 + Values: + - Value: 0x0000000000000012 + - Value: 0x00000000FFFFFFFF + - Value: 0x0000000000000001 + - AbbrCode: 0x00000003 + Values: + - Value: 0x000000000000001C + - Value: 0x0000000000003000 + - Value: 0x0000000000003000 + - AbbrCode: 0x00000003 + Values: + - Value: 0x0000000000000026 + - Value: 0x0000000000004000 + - Value: 0x0000000000003FFF + - AbbrCode: 0x00000000 + Values: [] + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 4); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + + // Test that the only function that made it was the "main" function. + EXPECT_EQ(GR->getNumAddresses(), 1u); + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + StringRef MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "main"); +} + +TEST(GSYMTest, TestDWARFDeadStripAddr8) { + // Check that various techniques that compilers use for dead code stripping + // work for 4 byte addresses. Make sure we keep the good functions and + // strip any functions whose name starts with "stripped". + // + // 1 - Compilers might set the low PC to -1 (UINT64_MAX) for compile unit + // with 8 byte addresses ("stripped1") + // 2 - Set the low and high PC to the same value ("stripped2") + // 3 - Have the high PC lower than the low PC ("stripped3") + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("/tmp/main.c") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000002000) + // DW_AT_language (DW_LANG_C_plus_plus) + // + // 0x0000001e: DW_TAG_subprogram + // DW_AT_name ("main") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000002000) + // + // 0x0000002f: DW_TAG_subprogram + // DW_AT_name ("stripped1") + // DW_AT_low_pc (0xffffffffffffffff) + // DW_AT_high_pc (0x0000000000000000) + // + // 0x00000040: DW_TAG_subprogram + // DW_AT_name ("stripped2") + // DW_AT_low_pc (0x0000000000003000) + // DW_AT_high_pc (0x0000000000003000) + // + // 0x00000055: DW_TAG_subprogram + // DW_AT_name ("stripped3") + // DW_AT_low_pc (0x0000000000004000) + // DW_AT_high_pc (0x0000000000003fff) + // + // 0x0000006a: NULL + + StringRef yamldata = R"( + debug_str: + - '' + - '/tmp/main.c' + - main + - stripped1 + - stripped2 + - stripped3 + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Code: 0x00000003 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + debug_info: + - Length: + TotalLength: 103 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - Value: 0x0000000000000004 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000001000 + - Value: 0x0000000000001000 + - AbbrCode: 0x00000002 + Values: + - Value: 0x0000000000000012 + - Value: 0xFFFFFFFFFFFFFFFF + - Value: 0x0000000000000001 + - AbbrCode: 0x00000003 + Values: + - Value: 0x000000000000001C + - Value: 0x0000000000003000 + - Value: 0x0000000000003000 + - AbbrCode: 0x00000003 + Values: + - Value: 0x0000000000000026 + - Value: 0x0000000000004000 + - Value: 0x0000000000003FFF + - AbbrCode: 0x00000000 + Values: [] + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + auto &OS = llvm::nulls(); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, OS, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded()); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + ASSERT_THAT_ERROR(GC.encode(FW), Succeeded()); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + + // Test that the only function that made it was the "main" function. + EXPECT_EQ(GR->getNumAddresses(), 1u); + auto ExpFI = GR->getFunctionInfo(0x1000); + ASSERT_THAT_EXPECTED(ExpFI, Succeeded()); + ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x2000)); + StringRef MethodName = GR->getString(ExpFI->Name); + EXPECT_EQ(MethodName, "main"); +}