Index: ELF/CMakeLists.txt =================================================================== --- ELF/CMakeLists.txt +++ ELF/CMakeLists.txt @@ -17,6 +17,7 @@ OutputSections.cpp Relocations.cpp ScriptParser.cpp + SplitDebugInfo.cpp Strings.cpp SymbolListFile.cpp SymbolTable.cpp Index: ELF/Config.h =================================================================== --- ELF/Config.h +++ ELF/Config.h @@ -92,6 +92,7 @@ bool EnableNewDtags; bool ExportDynamic; bool FatalWarnings; + bool GdbIndex; bool GcSections; bool GnuHash = false; bool ICF; Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -401,6 +401,7 @@ Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); Config->GcSections = getArg(Args, OPT_gc_sections, OPT_no_gc_sections, false); + Config->GdbIndex = Args.hasArg(OPT_gdb_index); Config->ICF = Args.hasArg(OPT_icf); Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); Index: ELF/InputFiles.h =================================================================== --- ELF/InputFiles.h +++ ELF/InputFiles.h @@ -131,6 +131,7 @@ void parse(llvm::DenseSet &ComdatGroups); ArrayRef *> getSections() const { return Sections; } + InputSectionBase *getSection(uint32_t Index) const; InputSectionBase *getSection(const Elf_Sym &Sym) const; SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -344,8 +344,7 @@ template InputSectionBase * -elf::ObjectFile::getSection(const Elf_Sym &Sym) const { - uint32_t Index = this->getSectionIndex(Sym); +elf::ObjectFile::getSection(uint32_t Index) const { if (Index == 0) return nullptr; if (Index >= Sections.size()) @@ -362,6 +361,12 @@ } template +InputSectionBase * +elf::ObjectFile::getSection(const Elf_Sym &Sym) const { + return getSection(this->getSectionIndex(Sym)); +} + +template SymbolBody *elf::ObjectFile::createSymbolBody(const Elf_Sym *Sym) { int Binding = Sym->getBinding(); InputSectionBase *Sec = getSection(*Sym); Index: ELF/Options.td =================================================================== --- ELF/Options.td +++ ELF/Options.td @@ -86,6 +86,9 @@ def image_base : J<"image-base=">, HelpText<"Set the base address">; +def gdb_index: F<"gdb-index">, + HelpText<"Generate .gdb_index section">; + def init: S<"init">, MetaVarName<"">, HelpText<"Specify an initializer function">; Index: ELF/OutputSections.h =================================================================== --- ELF/OutputSections.h +++ ELF/OutputSections.h @@ -12,6 +12,7 @@ #include "Config.h" #include "Relocations.h" +#include "SplitDebugInfo.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/SmallPtrSet.h" @@ -118,6 +119,52 @@ Elf_Shdr Header; }; +template +class GdbIndexSection final : public OutputSectionBase { + const unsigned OffsetTypeSize = 4; + const unsigned CompilationUnitSize = 16; + const unsigned AddressEntrySize = 16 + OffsetTypeSize; + const unsigned SymTabEntrySize = 2 * OffsetTypeSize; + +public: + GdbIndexSection(); + void finalize() override; + void writeTo(uint8_t *Buf) override; + + // Stores pointer to .debug_info output section. + OutputSectionBase *DebugInfoSec = nullptr; + + // Pairs of [CU Offset, CU length]. + std::vector> CompilationUnits; + + struct AddressEntry { + InputSectionBase *Section; // Section. + uint64_t LowAddress; // The low address. + uint64_t HighAddress; // The high address. + uint32_t CuIndex; // The CU index. + }; + std::vector AddressArea; + + llvm::StringTableBuilder StringPool; + + GdbHashTab SymbolTable; + + // The CU vector portion of the constant pool. + std::vector>> CuVectors; + +private: + bool parseDebugSections(); + + uint32_t CuTypesOffset; + uint32_t CuListOffset; + uint32_t SymTabOffset; + uint32_t ConstantPoolOffset; + uint32_t StringPoolOffset; + + size_t CuVectorsSize = 0; + std::vector CuVectorsOffset; +}; + template class GotSection final : public OutputSectionBase { typedef OutputSectionBase Base; typedef typename ELFT::uint uintX_t; @@ -751,6 +798,7 @@ static DynamicSection *Dynamic; static EhFrameHeader *EhFrameHdr; static EhOutputSection *EhFrame; + static GdbIndexSection *GdbIndex; static GnuHashTableSection *GnuHashTab; static GotPltSection *GotPlt; static GotSection *Got; @@ -814,6 +862,7 @@ template DynamicSection *Out::Dynamic; template EhFrameHeader *Out::EhFrameHdr; template EhOutputSection *Out::EhFrame; +template GdbIndexSection *Out::GdbIndex; template GnuHashTableSection *Out::GnuHashTab; template GotPltSection *Out::GotPlt; template GotSection *Out::Got; Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -11,6 +11,7 @@ #include "Config.h" #include "EhFrame.h" #include "LinkerScript.h" +#include "SplitDebugInfo.h" #include "Strings.h" #include "SymbolTable.h" #include "Target.h" @@ -56,6 +57,117 @@ } template +GdbIndexSection::GdbIndexSection() + : OutputSectionBase(".gdb_index", SHT_PROGBITS, 0), + StringPool(llvm::StringTableBuilder::RAW) {} + +template bool GdbIndexSection::parseDebugSections() { + if (!DebugInfoSec) { + error(".debug_info is required for building .gdb_index"); + return false; + } + std::vector *> &IS = + static_cast *>(DebugInfoSec)->Sections; + for (InputSection *I : IS) + DwarfInfoReader(*this, I).addToGdbIndex(); + return true; +} + +template void GdbIndexSection::finalize() { + if (!parseDebugSections()) + return; + + // GdbIndex header consist from version fields + // and 5 more fields with different kinds of offsets. + CuListOffset = 6 * OffsetTypeSize; + + CuTypesOffset = CuListOffset + CompilationUnits.size() * CompilationUnitSize; + SymTabOffset = CuTypesOffset + AddressArea.size() * AddressEntrySize; + ConstantPoolOffset = + SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; + + for (std::vector> &CuVec : CuVectors) { + CuVectorsOffset.push_back(CuVectorsSize); + CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); + } + StringPoolOffset = ConstantPoolOffset + CuVectorsSize; + + StringPool.finalizeInOrder(); + this->Header.sh_size = StringPoolOffset + StringPool.getSize(); +} + +template void GdbIndexSection::writeTo(uint8_t *Buf) { + // Write version. + write32le(Buf, 7); + Buf += 4; + + write32le(Buf, CuListOffset); + Buf += 4; + + // Offset of the types CU list. + write32le(Buf, CuTypesOffset); + Buf += 4; + + // Offset of the address area, the same as offset of the types CU list, + // as we dont support types CU lists yet, so it is empty. + write32le(Buf, CuTypesOffset); + Buf += 4; + + // Offset of the symbol table. + write32le(Buf, SymTabOffset); + Buf += 4; + + //Offset of the constant pool. + write32le(Buf, ConstantPoolOffset); + Buf += 4; + + // Write the CU list. + for (std::pair& CU : CompilationUnits) { + write64le(Buf, CU.first); + write64le(Buf + 8, CU.second); + Buf += 16; + } + + // Write the address area. + for (AddressEntry &E : AddressArea) { + uintX_t BaseAddr = E.Section->OutSec->getVA() + E.Section->getOffset(0); + write64le(Buf, BaseAddr + E.LowAddress); + write64le(Buf + 8, BaseAddr + E.HighAddress); + write32le(Buf + 16, E.CuIndex); + Buf += 20; + } + + // Write the symbol table. + for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { + GdbSymbol *Sym = SymbolTable.getSymbol(I); + if (Sym) { + size_t NameOffset = + Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; + size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; + write32le(Buf, NameOffset); + write32le(Buf + 4, CuVectorOffset); + } + Buf += 8; + } + + // Write the CU vectors into the constant pool. + for (std::vector> &CuVec : CuVectors) { + write32le(Buf, CuVec.size()); + Buf += 4; + for (std::pair &P : CuVec) { + uint32_t Index = P.first; + uint8_t Flags = P.second; + Index |= Flags << 24; + write32le(Buf, Index); + Buf += 4; + } + } + + StringRef Strings = StringPool.data(); + memcpy(Buf, Strings.data(), Strings.size()); +} + +template GotPltSection::GotPltSection() : OutputSectionBase(".got.plt", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE) { this->Header.sh_addralign = Target->GotPltEntrySize; @@ -2047,6 +2159,11 @@ template class BuildIdHexstring; template class BuildIdHexstring; +template class GdbIndexSection; +template class GdbIndexSection; +template class GdbIndexSection; +template class GdbIndexSection; + template class OutputSectionFactory; template class OutputSectionFactory; template class OutputSectionFactory; Index: ELF/SplitDebugInfo.h =================================================================== --- ELF/SplitDebugInfo.h +++ ELF/SplitDebugInfo.h @@ -0,0 +1,212 @@ +//===- SplitDebugInfo.h --------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// + +#ifndef LLD_ELF_SPLIT_DEBUG_INFO_H +#define LLD_ELF_SPLIT_DEBUG_INFO_H + +#include "InputFiles.h" +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf { + +template class DwarfDieParser; +template class GdbIndexSection; +template class InputSection; + +// Helper class for extracting relocations data. +template class RelocMapper { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Sym Elf_Sym; + + InputSection *Sec; + + const Elf_Shdr &RelSec; + const llvm::object::ELFFile &EObj; + + size_t Position = 0; + + uintX_t getNextRelocAddend(); + uintX_t getNextRelocOffset(); + uintX_t getNextRelocSymbolIndex(); + + template void advance(ArrayRef Rel, uintX_t Offset); + void advanceToOffset(uintX_t Offset); + +public: + RelocMapper(InputSection *Sec); + uintX_t lookupReloc(uintX_t RelOffset, uintX_t *TargetOffset); +}; + +struct AbbreviationCode { + AbbreviationCode(uint64_t Tag) : Tag(Tag){}; + uint64_t Tag; + std::vector> Attributes; // [Attr, Form] +}; + +// The abbreviations tables for all compilation units are contained in a +// separate object file section called ".debug_abbrev". +// Defines the abbreviation codes used by the skeleton .debug_info section. +template class DwarfAbbrevTable { + typedef typename ELFT::uint uintX_t; + typedef typename ELFT::Shdr Elf_Shdr; + +public: + void read(elf::ObjectFile *File, uintX_t Shndx, uintX_t Offset); + + AbbreviationCode *getEntry(uint32_t Code); + +protected: + ArrayRef Data; + uint32_t AbbrevOffset = (uint32_t)-1; + uintX_t AbbrevShndx = (uint32_t)-1; + llvm::DenseMap> Codes; +}; + +// Linker extracts the public names from the .debug_gnu_pubnames and +// .debug_gnu_pubtypes sections. This is helper class for that. +template class DwarfPubTable { + typedef typename ELFT::uint uintX_t; + + StringRef SectionName; + std::unique_ptr> RelMapper; + InputSection *Section = nullptr; + + ArrayRef Content; + const uint32_t OffsetSize = 4; + +public: + DwarfPubTable(StringRef Name) : SectionName(Name) {} + bool locateSection(elf::ObjectFile &File); + + StringRef getNextName(uint8_t &Flag); + + std::pair> readHeader(uintX_t Offset); +}; + +// Main class that is used to scan all .debug_info sections. +template class DwarfInfoReader { + typedef typename ELFT::uint uintX_t; + + ArrayRef Data; + GdbIndexSection &GdbIndex; + + elf::ObjectFile *PubObject = nullptr; + DwarfPubTable PubNameTable; + llvm::DenseMap CuPubNameMap; + DwarfPubTable PubTypeTable; + llvm::DenseMap CuPubTypeMap; + + void visitTopDie(DwarfDieParser &Die, uintX_t CuOffset); + + void readAddressArea(DwarfDieParser &Die); + bool readPubNamesAndTypes(DwarfDieParser &Die, uintX_t CuOffset); + void mapPubnamesAndTypesToDies(elf::ObjectFile &File); + bool readPubTable(DwarfPubTable &Table, uintX_t Offset); + uintX_t findPubTableOffset(uintX_t Offset, + llvm::DenseMap &Map); + void addSymbol(StringRef Name, uint8_t Flags); + +public: + DwarfInfoReader(GdbIndexSection &GdbIndex, InputSection *Sec) + : GdbIndex(GdbIndex), DebugInfoSec(Sec), PubNameTable("pubnames"), + PubTypeTable("pubtypes") {} + void addToGdbIndex(); + + ArrayRef getDataAtOffset(uintX_t CuOffset, uintX_t Offset); + + DwarfAbbrevTable AbbrevTable; + uint8_t AddressSize; + InputSection *DebugInfoSec; +}; + +struct DieAttribute { + uint64_t Attr; + uint64_t Form; + + DieAttribute(uint64_t Attr, uint64_t Form) : Attr(Attr), Form(Form) {} + + union { + int64_t IntVal; + uint64_t UintVal; + uint64_t RefVal; + } Value; + + unsigned SecNdx = 0; +}; + +// DWARF uses a series of debugging information entries (DIEs) to define a +// low-level representation of a source program. Each debugging information +// entry consists of an identifying tag and a series of attributes. This class +// is used to parce DIEs. +template class DwarfDieParser { + typedef typename ELFT::uint uintX_t; + + const DwarfInfoReader &Reader; + RelocMapper &Mapper; + + void readAttributes(uintX_t AttributesOffset, ArrayRef Attributes); + DieAttribute *findAttribute(uint64_t Attr); + + uintX_t CuOffset; + uintX_t DieOffset; + +public: + DwarfDieParser(DwarfInfoReader &Reader, RelocMapper &Mapper, + uintX_t CuOffset, uintX_t DieOffset); + + uintX_t getRefAttribute(uint64_t Attr); + uintX_t getAddressAttribute(uint64_t Attr, unsigned *SecNdx = nullptr); + uint64_t getUintAttribute(uint64_t Attr); + + AbbreviationCode *AbbreviationEntry; + std::vector Attributes; +}; + +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; + +} // namespace elf +} // namespace lld + +#endif Index: ELF/SplitDebugInfo.cpp =================================================================== --- ELF/SplitDebugInfo.cpp +++ ELF/SplitDebugInfo.cpp @@ -0,0 +1,727 @@ +//===- SplitDebugInfo.cpp -------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// File contains classes for implementation of --gdb-index command line option. +// +// Amount of debug information in large application can be large enough. That +// can lead to pollible linker slowdown, out of memory at link time, slow gdb +// debugger startup. In a large C++ application debug information accounts up to +// 87% of object size sent to linker under certain circumstances. +// +// Split debug information feature tries to solve the problems caused by huge +// amounts of debug information in large applications. +// By splitting the debug information into two parts at compile time - one part +// that remains in the .o file and another part that is written to a parallel +// .dwo ("DWARF object") file - we can reduce the total size of the object +// files processed by the linker. +// +// Feature was first implemented in GCC 4.7 and required support from gold +// linker, that had to implement --gdb-index option. As a result linker creates +// a .gdb_index section that allows GDB to find and read the .dwo files as it +// needs them. More specific information can be found at +// https://gcc.gnu.org/wiki/DebugFission. +// +// .gdb_index section format +// Detailed info: +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html +// +// A mapped index consists of several areas, laid out in order: +// 1) The file header. +// 2) The CU (compilation unit) list. This is a sequence of pairs of 64-bit +// little-endian values, sorted by the CU offset. The first element in each +// pair is the offset of a CU in the .debug_info section. The second element +// in each pair is the length of that CU. References to a CU elsewhere in the +// map are done using a CU index, which is just the 0-based index into this +// table. Note that if there are type CUs, then conceptually CUs and type CUs +// form a single list for the purposes of CU indices. +// 3) The types CU list. This is a sequence of triplets of 64-bit little-endian +// values. In a triplet, the first value is the CU offset, the second value +// is the type offset in the CU, and the third value is the type signature. +// The types CU list is not sorted. +// 4) The address area. The address area consists of a sequence of address +// entries. +// 5) The symbol table. This is an open-addressed hash table. The size of the +// hash table is always a power of 2. Each slot in the hash table consists of +// a pair of offset_type values. The first value is the offset of the +// symbol's +// name in the constant pool. The second value is the offset of the CU vector +// in the constant pool. +// 6) The constant pool. This is simply a bunch of bytes. It is organized so +// that alignment is correct: CU vectors are stored first, followed by +// strings. +// +// For costructing the .gdb_index section following steps are performed by lld: +// 1) For file header nothing special should be done. It contains the offsets to +// the areas below. +// 2) Scan the compilation unit headers of the .debug_info sections to build a +// list of compilation units. +// 3) Types CU list area can be removed in a future +// release of format (and does not appear in the DWARF v5 specification), as +// skeleton type units are no longer needed by GDB +// (see https://gcc.gnu.org/wiki/DebugFission). +// lld does nothing to support parsing of .debug_types because of +// that and types CU area is empty in result .gdb_index section. +// 4) Address area entries are extracted from DW_TAG_compile_unit DIEs of +// .debug_info sections. +// 5) For building the symbol table linker extracts the public names from the +// .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the +// hashtable in according to .gdb_index format specification. +// 6) Constant pool is populated at the same time as symbol table. +//===----------------------------------------------------------------------===// + +#include "SplitDebugInfo.h" +#include "Error.h" +#include "OutputSections.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; + +template +RelocMapper::RelocMapper(InputSection *Sec) + : Sec(Sec), RelSec(*Sec->RelocSections[0]), EObj(Sec->getFile()->getObj()) { +} + +template +typename ELFT::uint RelocMapper::lookupReloc(uintX_t RelOffset, + uintX_t *TargetOffset) { + advanceToOffset(RelOffset); + + if (RelOffset != getNextRelocOffset()) + return 0; + + elf::ObjectFile *File = Sec->getFile(); + const Elf_Shdr *SymTab = File->getSymbolTable(); + const Elf_Sym *Sym = + File->getObj().getSymbol(SymTab, getNextRelocSymbolIndex()); + + *TargetOffset = Sym->st_value + getNextRelocAddend(); + return Sym->st_shndx; +} + +template +typename ELFT::uint RelocMapper::getNextRelocAddend() { + if (RelSec.sh_type == SHT_RELA) + return EObj.relas(&RelSec)[Position].r_addend; + else + return 0; +} + +template +typename ELFT::uint RelocMapper::getNextRelocOffset() { + if (RelSec.sh_type == SHT_RELA) { + ArrayRef Relas = EObj.relas(&RelSec); + if (Position >= Relas.size()) + return (uintX_t)-1; + return Relas[Position].r_offset; + } + + ArrayRef Rels = EObj.rels(&RelSec); + if (Position >= Rels.size()) + return (uintX_t)-1; + return Rels[Position].r_offset; +} + +template +typename ELFT::uint RelocMapper::getNextRelocSymbolIndex() { + if (RelSec.sh_type == SHT_RELA) { + ArrayRef Relas = EObj.relas(&RelSec); + if (Position >= Relas.size()) + return (uintX_t)-1; + return Relas[Position].getSymbol(Config->Mips64EL); + } + + ArrayRef Rels = EObj.rels(&RelSec); + if (Position >= Rels.size()) + return (uintX_t)-1; + return Rels[Position].getSymbol(Config->Mips64EL); +} + +template +template +void RelocMapper::advance(ArrayRef Rel, uintX_t Offset) { + for (size_t E = Rel.size(); Position != E; ++Position) + if (Rel[Position].r_offset >= Offset) + break; +} + +template void RelocMapper::advanceToOffset(uintX_t Offset) { + if (RelSec.sh_type == SHT_RELA) + return advance(EObj.relas(&RelSec), Offset); + else + return advance(EObj.rels(&RelSec), Offset); +} + +template +void DwarfAbbrevTable::read(elf::ObjectFile *File, uintX_t Shndx, + uintX_t Offset) { + // Multiple debugging information entries may share the same abbreviation + // table entry. Each compilation unit is associated with a particular + // abbreviation table, but multiple compilation units may share the same + // table. If we already read this table then just return. + if (AbbrevOffset == Offset && this->AbbrevShndx == Shndx) + return; + AbbrevOffset = Offset; + AbbrevShndx = Shndx; + + if (InputSectionBase *Sec = File->getSection(AbbrevShndx)) + Data = Sec->getSectionData(); + else + fatal("error retriving .debug_abbrev section"); +} + +template +AbbreviationCode *DwarfAbbrevTable::getEntry(uint32_t Code) { + auto It = Codes.find(Code); + if (It != Codes.end()) + return It->second.get(); + + // Read and store abbrev code definitions until we find the + // one we're looking for. + while (true) { + // Each declaration begins with an unsigned LEB128 number representing the + // abbreviation code itself. + unsigned N; + uint64_t NextCode = decodeULEB128(Data.data(), &N); + if (NextCode == 0) + return nullptr; + Data = Data.drop_front(N); + + // The abbreviation code is followed by another + // unsigned LEB128 number that encodes the entry痴 tag. + uint64_t Tag = decodeULEB128(Data.data(), &N); + Data = Data.drop_front(N); + + // Following the tag encoding is a 1 byte value that determines whether a + // debugging information entry using this abbreviation has child entries. + Data = Data.drop_front(1); + + std::unique_ptr NewCode = + std::make_unique(Tag); + while (true) { + uint64_t Attr = decodeULEB128(Data.data(), &N); + Data = Data.drop_front(N); + uint64_t Form = decodeULEB128(Data.data(), &N); + Data = Data.drop_front(N); + + // The series of attribute specifications ends with an entry containing 0 + // for the name and 0 for the form. + if (Attr == 0 && Form == 0) + break; + NewCode->Attributes.push_back(std::make_pair(Attr, Form)); + } + + AbbreviationCode *Ret = NewCode.get(); + Codes[Tag].swap(NewCode); + if (NextCode == Code) + return Ret; + } + + return nullptr; +} + +// For each compilation unit compiled with a DWARF producer, a contribution is +// made to the .debug_info section of the object file. Each such contribution +// consists of a compilation unit header followed by a single +// DW_TAG_compile_unit. (Refer to "DWARF Debugging Information Format V4", 7.5 +// Format of Debugging Information). +// Method is used to parse .debug_info input sections to extract information +// needed to produce .gdb_index output section. +template void DwarfInfoReader::addToGdbIndex() { + const endianness E = ELFT::TargetEndianness; + if (DebugInfoSec->RelocSections.size() != 1) { + error(".debug_info should have single relocation section"); + return; + } + + mapPubnamesAndTypesToDies(*DebugInfoSec->getFile()); + + RelocMapper RelocMapper(DebugInfoSec); + Data = DebugInfoSec->getSectionData(); + const uint8_t *PInfo = Data.begin(); + + while (PInfo < Data.end()) { + const uint8_t *CuStart = PInfo; + + // A 4-byte or 12-byte unsigned integer representing the length of the + // .debug_info contribution for that compilation unit, not including the + // length field itself. + uint32_t UnitLength = read(PInfo); + PInfo += sizeof(uint32_t); + if (UnitLength == (uint32_t)-1) + fatal("64-bits DWARF is not supported"); + + const uint8_t *CuEnd = PInfo + UnitLength; + + // A 2-byte unsigned integer representing the version of the DWARF + // information for the compilation unit. + uint16_t Version = read(PInfo); + if (Version != 4) + fatal("unsupported version of .debug_info"); + PInfo += sizeof(uint16_t); + + // Offset into the.debug_abbrev section.This offset associates the + // compilation unit with a particular set of debugging information entry + // abbreviations. 4 byte unsigned length for 32-bit DWARF. + uint32_t AbbrevOffset = read(PInfo); + + // Obtain a index of .debug_abbrev section. We use current offset + // in compilation unit that points to AbbrevOffset to search relocation + // and retrive target section index which is index of .debug_abbrev. + uintX_t AbbrevShndx = 0; + uintX_t RelocOffset = PInfo - Data.begin(); + AbbrevShndx = RelocMapper.lookupReloc(RelocOffset, &AbbrevShndx); + if (!AbbrevShndx) + fatal(".debug_abbrev section index not found"); + PInfo += sizeof(uint32_t); + + AddressSize = *PInfo++; + + // Retrive the .debug_abbrev table data. + AbbrevTable.read(DebugInfoSec->getFile(), AbbrevShndx, AbbrevOffset); + + uintX_t CuOffset = CuStart - Data.begin(); + + // When using -gsplit-dwarf, .debug_info section contains a single + // DW_TAG_compile_unit DIE, with no children. Here we create DIE parcer + // that reads abbreviation entry and list of atributes. + DwarfDieParser RootDie(*this, RelocMapper, CuOffset, PInfo - CuStart); + + // The abbreviation code 0 is reserved. Debugging information entries + // consisting of only the abbreviation code 0 are considered null entries, + // skip them. + if (RootDie.AbbreviationEntry && RootDie.AbbreviationEntry->Tag != 0) { + GdbIndex.CompilationUnits.push_back( + std::make_pair(DebugInfoSec->OutSecOff + CuOffset, CuEnd - CuStart)); + visitTopDie(RootDie, CuOffset); + } + + PInfo = CuEnd; + } +} + +// Debugging information entry may have a machine code +// address or range of machine code addresses information. +// Method reads and stores that for futher .gdb_index building. +template +void DwarfInfoReader::readAddressArea(DwarfDieParser &Die) { + uintX_t Ranges = Die.getRefAttribute(dwarf::DW_AT_ranges); + if (Ranges != (uintX_t)-1) + fatal("DW_AT_ranges support not implemented"); + + unsigned SecNdx; + uintX_t LowPc = Die.getAddressAttribute(dwarf::DW_AT_low_pc, &SecNdx); + uintX_t HighPc = Die.getAddressAttribute(dwarf::DW_AT_high_pc); + + if (LowPc == (uintX_t)-1) + return; + + // Since DWARF4, DW_AT_high_pc may also be of class constant, in which case + // it represents function size. + if (HighPc == (uintX_t)-1) + HighPc = LowPc + Die.getUintAttribute(dwarf::DW_AT_high_pc); + + uint32_t CuUnitId = GdbIndex.CompilationUnits.size() - 1; + InputSectionBase *Sec = DebugInfoSec->getFile()->getSection(SecNdx); + GdbIndex.AddressArea.push_back({Sec, LowPc, HighPc, CuUnitId}); +} + +// Locates the .debug_gnu_[pub_names/pub_types] section in a File. +template +bool DwarfPubTable::locateSection(elf::ObjectFile &File) { + for (InputSectionBase *S : File.getSections()) { + if (!S || S == &InputSection::Discarded) + continue; + StringRef Prefix = ".debug_gnu_"; + if (!S->Name.startswith(Prefix)) + continue; + if (SectionName != S->Name.drop_front(Prefix.size())) + continue; + Section = dyn_cast>(S); + RelMapper = llvm::make_unique>(Section); + return true; + } + return false; +} + +// Each .debug_pub* section consists of sets of variable length entries, +// each such set has a header, this method is used to parse it. +template +std::pair> +DwarfPubTable::readHeader(uintX_t Offset) { + const endianness E = ELFT::TargetEndianness; + + ArrayRef Data = Section->getSectionData(); + if (Offset + 14 >= Data.size()) + return {false, {}}; + Data = Data.drop_front(Offset); + ArrayRef Begin = Data; + + uint32_t UnitLength = read32(Data.data()); + Data = Data.drop_front(4); + if (UnitLength == (uint32_t)-1) + fatal("64-bit DWARF format is not supported"); + UnitLength += 4; + + uint16_t Version = read16(Data.data()); + Data = Data.drop_front(2); + if (Version != 2) + fatal("unsupported version of .debug_pubnames or .debug_pubtypes"); + + uintX_t CuOffset; + RelMapper->lookupReloc(Data.data() - Begin.data(), &CuOffset); + + // Skip the debug_info_offset and debug_info_size fields. + Data = Data.drop_front(2 * OffsetSize); + Content = Data; + + return {true, {UnitLength, CuOffset}}; +} + +template +StringRef DwarfPubTable::getNextName(uint8_t &Flag) { + if (Content.size() <= OffsetSize) + return {}; + + Content = Content.drop_front(OffsetSize); + + Flag = Content[0]; + Content = Content.drop_front(); + + ArrayRef Ret = Content; + size_t End = llvm::find(Ret, 0) - Ret.begin(); + Content = Ret.drop_front(End + 1); + + Ret = Ret.drop_back(Content.size()); + return {(const char *)Ret.data(), Ret.size()}; +} + +// For lookup by name, two tables are maintained in separate object file +// sections named .debug_pubnames for objects and functions, and .debug_pubtypes +// for types. Each table consists of sets of variable length entries. Each set +// describes the names of global objects and functions, or global types, +// respectively, whose definitions are represented by debugging information +// entries owned by a single compilation unit. +// Method performs mapping of these sets to compilation units offsets, +// so names and types can be read later. +template +void DwarfInfoReader::mapPubnamesAndTypesToDies( + elf::ObjectFile &File) { + auto CreateMap = [&](DwarfPubTable &Table, + llvm::DenseMap &Out) { + Out.clear(); + if (Table.locateSection(File)) { + uintX_t Off = 0; + std::pair> Result = + Table.readHeader(Off); + while (Result.first) { + uintX_t UnitLength = Result.second.first; + uintX_t CuOffset = Result.second.second; + Out[CuOffset] = Off; + Off += UnitLength; + Result = Table.readHeader(Off); + } + } + }; + + CreateMap(PubNameTable, CuPubNameMap); + CreateMap(PubTypeTable, CuPubTypeMap); +} + +template +typename ELFT::uint DwarfInfoReader::findPubTableOffset( + uintX_t Offset, llvm::DenseMap &Map) { + auto I = Map.find(Offset); + if (I != Map.end()) + return I->second; + return (uintX_t)-1; +} + +// Iterative hash function for symbol's name is described at +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html +static uint32_t hash(const char *Str) { + uint32_t R = 0; + uint8_t C; + while ((C = *Str++) != 0) { + C = tolower(C); + R = R * 67 + C - 113; + } + return R; +} + +// Fills the symbol table and CU vector portion of the constant pool. +template +void DwarfInfoReader::addSymbol(StringRef Name, uint8_t Flags) { + uint32_t Hash = hash(Name.data()); + size_t Offset = GdbIndex.StringPool.add(Name); + + bool IsNew; + GdbSymbol *Sym; + std::tie(IsNew, Sym) = GdbIndex.SymbolTable.add(Hash, Offset); + + uintX_t CuIndex = GdbIndex.CompilationUnits.size() - 1; + if (IsNew) { + Sym->CuVectorIndex = GdbIndex.CuVectors.size(); + GdbIndex.CuVectors.push_back({{CuIndex, Flags}}); + return; + } + + std::vector> &CuVec = + GdbIndex.CuVectors[Sym->CuVectorIndex]; + CuVec.push_back({CuIndex, Flags}); +} + +// Reads the single .debug_pubnames/.debug_pubtypes section content at +// specified offset and calls addSymbol() for each name/type found. +template +bool DwarfInfoReader::readPubTable(DwarfPubTable &Table, + uintX_t Offset) { + std::pair> Header = + Table.readHeader(Offset); + if (!Header.first) + return false; + + while (true) { + uint8_t Flag; + StringRef Name = Table.getNextName(Flag); + if (Name.empty()) + break; + addSymbol(Name, Flag); + } + return true; +} + +// Parses the .debug_pubnames and .debug_pubtypes sections. +template +bool DwarfInfoReader::readPubNamesAndTypes(DwarfDieParser &Die, + uintX_t CuOffset) { + assert(Die.AbbreviationEntry->Tag == dwarf::DW_TAG_compile_unit && + "only DW_TAG_compile_unit is supported"); + + if (!Die.getUintAttribute(dwarf::DW_AT_GNU_pubnames)) { + error("unable to find DW_AT_GNU_pubnames attribute"); + return false; + } + + bool ReadNames = false; + bool ReadTypes = false; + uintX_t Offset = findPubTableOffset(CuOffset, CuPubNameMap); + if (Offset != (uintX_t)-1) + ReadNames = readPubTable(PubNameTable, Offset); + + Offset = findPubTableOffset(CuOffset, CuPubTypeMap); + if (Offset != (uintX_t)-1) + ReadTypes = readPubTable(PubTypeTable, Offset); + + return ReadNames || ReadTypes; +} + +template +void DwarfInfoReader::visitTopDie(DwarfDieParser &Die, + uintX_t CuOffset) { + if (Die.AbbreviationEntry->Tag != dwarf::DW_TAG_compile_unit) + fatal("only DW_TAG_compile_unit is supported for top level DIE"); + + readAddressArea(Die); + if (!readPubNamesAndTypes(Die, CuOffset)) + fatal("error reading pubnames and/or pubtypes section"); +} + +template +ArrayRef DwarfInfoReader::getDataAtOffset(uintX_t CuOffset, + uintX_t Offset) { + return Data.drop_front(CuOffset + Offset); +} + +template +DwarfDieParser::DwarfDieParser(DwarfInfoReader &Reader, + RelocMapper &Mapper, + uintX_t CuOffset, uintX_t DieOffset) + : Reader(Reader), Mapper(Mapper), CuOffset(CuOffset), DieOffset(DieOffset) { + ArrayRef Data = Reader.getDataAtOffset(CuOffset, DieOffset); + + // Each debugging information entry begins with an unsigned LEB128 number + // containing the abbreviation code for the entry. + // This code represents an entry within the abbreviations table + // associated with the compilation unit containing this entry. + unsigned N; + uint64_t C = decodeULEB128(Data.data(), &N); + if (!C) + fatal("the abbreviation codes 0 are considered null entries"); + + // Now get the entry from abbreviation table. + AbbreviationEntry = Reader.AbbrevTable.getEntry(C); + + // The abbreviation code is followed by a series of attribute values. + readAttributes(N, Data.drop_front(N)); +} + +template +void DwarfDieParser::readAttributes(uintX_t AttributesOffset, + ArrayRef Buf) { + const endianness E = ELFT::TargetEndianness; + + size_t BufLen = Buf.size(); + size_t Count = AbbreviationEntry->Attributes.size(); + for (size_t I = 0; I < Count; ++I) { + uint64_t Attr = AbbreviationEntry->Attributes[I].first; + uint64_t Form = AbbreviationEntry->Attributes[I].second; + + DieAttribute A(Attr, Form); + uintX_t FullAttribOffset = + CuOffset + DieOffset + AttributesOffset + BufLen - Buf.size(); + + switch (Form) { + case dwarf::DW_FORM_sec_offset: + case dwarf::DW_FORM_strp: { + uintX_t SecOffset = read32(Buf.data()); + Buf = Buf.drop_front(4); + A.SecNdx = Mapper.lookupReloc(FullAttribOffset, &SecOffset); + A.Value.RefVal = SecOffset; + break; + } + case dwarf::DW_FORM_addr: { + uintX_t SecOffset = 0; + if (Reader.AddressSize == 4) + SecOffset = read32(Buf.data()); + else + SecOffset = read64(Buf.data()); + Buf = Buf.drop_front(Reader.AddressSize); + A.SecNdx = Mapper.lookupReloc(FullAttribOffset, &SecOffset); + A.Value.RefVal = SecOffset; + break; + } + case dwarf::DW_FORM_data8: { + uintX_t SecOffset = read64(Buf.data()); + Buf = Buf.drop_front(8); + A.SecNdx = Mapper.lookupReloc(FullAttribOffset, &SecOffset); + A.Value.UintVal = SecOffset; + break; + } + case dwarf::DW_FORM_flag_present: + A.Value.IntVal = 1; + break; + default: + fatal("unsupported DW_FORM_* type"); + } + + Attributes.push_back(A); + } +} + +template +typename ELFT::uint DwarfDieParser::getRefAttribute(uint64_t Attr) { + DieAttribute *A = findAttribute(Attr); + if (!A) + return (uintX_t)-1; + + switch (A->Form) { + case dwarf::DW_FORM_ref_addr: + case dwarf::DW_FORM_sec_offset: + return A->Value.RefVal; + default: + fatal("unsupported DW_FORM_* type"); + } + + return 0; +} + +template +typename ELFT::uint +DwarfDieParser::getAddressAttribute(uint64_t Attr, unsigned *SecNdx) { + DieAttribute *A = findAttribute(Attr); + if (!A || A->Form != dwarf::DW_FORM_addr) + return (uintX_t)-1; + if (SecNdx) + *SecNdx = A->SecNdx; + return A->Value.RefVal; +} + +template +typename uint64_t DwarfDieParser::getUintAttribute(uint64_t Attr) { + DieAttribute *A = findAttribute(Attr); + if (!A) + return (uintX_t)-1; + switch (A->Form) { + case dwarf::DW_FORM_flag_present: + return A->Value.IntVal; + case dwarf::DW_FORM_data8: + return A->Value.UintVal; + default: + fatal("not implemented DW_FORM_*"); + } +} + +template +DieAttribute *DwarfDieParser::findAttribute(uint64_t Attr) { + auto I = std::find_if( + Attributes.begin(), Attributes.end(), + [=](const DieAttribute &Attribute) { return Attribute.Attr == Attr; }); + if (I == Attributes.end()) + return nullptr; + return &*I; +} + +std::pair GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. +// The step size used to find the next candidate +// slot when handling a hash collision is specified in .gdb_index section format +// (https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html) +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +template class elf::DwarfInfoReader; +template class elf::DwarfInfoReader; +template class elf::DwarfInfoReader; +template class elf::DwarfInfoReader; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -138,6 +138,7 @@ std::unique_ptr> DynStrTab; std::unique_ptr> DynSymTab; std::unique_ptr> EhFrameHdr; + std::unique_ptr> GdbIndex; std::unique_ptr> GnuHashTab; std::unique_ptr> GotPlt; std::unique_ptr> HashTab; @@ -173,6 +174,8 @@ GnuHashTab.reset(new GnuHashTableSection); if (Config->SysvHash) HashTab.reset(new HashTableSection); + if (Config->GdbIndex) + GdbIndex.reset(new GdbIndexSection); StringRef S = Config->Rela ? ".rela.plt" : ".rel.plt"; GotPlt.reset(new GotPltSection); RelaPlt.reset(new RelocationSection(S, false /*Sort*/)); @@ -200,6 +203,7 @@ Out::Dynamic = &Dynamic; Out::EhFrame = &EhFrame; Out::EhFrameHdr = EhFrameHdr.get(); + Out::GdbIndex = GdbIndex.get(); Out::GnuHashTab = GnuHashTab.get(); Out::Got = &Got; Out::GotPlt = GotPlt.get(); @@ -707,6 +711,9 @@ Out::InitArray = findSection(".init_array"); Out::FiniArray = findSection(".fini_array"); + if (Out::GdbIndex) + Out::GdbIndex->DebugInfoSec = findSection(".debug_info"); + // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop // symbols for sections, so that the runtime can get the start and end // addresses of each section by section name. Add such symbols. @@ -840,6 +847,7 @@ // This order is not the same as the final output order // because we sort the sections using their attributes below. + Add(Out::GdbIndex); Add(Out::SymTab); Add(Out::ShStrTab); Add(Out::StrTab); Index: test/ELF/gdb-index.s =================================================================== --- test/ELF/gdb-index.s +++ test/ELF/gdb-index.s @@ -0,0 +1,80 @@ +## gdb-index-a.elf and gdb-index-b.elf are a test.o and test2.o renamed, +## were generated in a next way: +## test.cpp: +## double foo1; +## float bar1; +## void method1() {} +## int main() { return 0; } +## test2.cpp: +## double foo2; +## char method2() {} +## Compiled with: +## gcc -gsplit-dwarf -c test.cpp test2.cpp +## gcc version 5.3.1 20160413 +## Info about gdb-index: https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html + +# REQUIRES: x86 +# RUN: ld.lld --gdb-index -e main %p/Inputs/gdb-index-a.elf %p/Inputs/gdb-index-b.elf -o %t +# RUN: llvm-dwarfdump -debug-dump=gdb_index %t | FileCheck %s +# RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=DISASM + +# DISASM: Disassembly of section .text: +# DISASM-NEXT: _Z7method1v: +# DISASM-NEXT: 11000: +# DISASM-NEXT: 11001: +# DISASM-NEXT: 11004: +# DISASM-NEXT: 11005: +# DISASM-NEXT: 11006: +# DISASM: main: +# DISASM-NEXT: 11007: +# DISASM-NEXT: 11008: +# DISASM-NEXT: 1100b: +# DISASM-NEXT: 11010: +# DISASM-NEXT: 11011: +# DISASM: _Z7method2v: +# DISASM-NEXT: 11012: +# DISASM-NEXT: 11013: +# DISASM-NEXT: 11016: +# DISASM-NEXT: 11017: +# DISASM-NEXT: 11018: + +# CHECK: .gnu_index contents: +# CHECK-NEXT: Version = 7 +# CHECK: CU list offset = 0x18, has 2 entries: +# CHECK-NEXT: 0: Offset = 0x0, Length = 0x34 +# CHECK-NEXT: 1: Offset = 0x34, Length = 0x34 +# CHECK: Address area offset = 0x38, has 2 entries: +# CHECK-NEXT: Low address = 0x11000, High address = 0x11012, CU index = 0 +# CHECK-NEXT: Low address = 0x11012, High address = 0x11019, CU index = 1 +# CHECK: Symbol table offset = 0x60, size = 1024, filled slots: +# CHECK-NEXT: 158: Name offset = 0x66, CU vector offset = 0x18 +# CHECK-NEXT: String name: bar1, CU vector index: 3 +# CHECK-NEXT: 191: Name offset = 0x61, CU vector offset = 0x10 +# CHECK-NEXT: String name: foo1, CU vector index: 2 +# CHECK-NEXT: 192: Name offset = 0x84, CU vector offset = 0x44 +# CHECK-NEXT: String name: foo2, CU vector index: 8 +# CHECK-NEXT: 447: Name offset = 0x54, CU vector offset = 0x0 +# CHECK-NEXT: String name: method1, CU vector index: 0 +# CHECK-NEXT: 448: Name offset = 0x7c, CU vector offset = 0x3c +# CHECK-NEXT: String name: method2, CU vector index: 7 +# CHECK-NEXT: 489: Name offset = 0x5c, CU vector offset = 0x8 +# CHECK-NEXT: String name: main, CU vector index: 1 +# CHECK-NEXT: 511: Name offset = 0x76, CU vector offset = 0x34 +# CHECK-NEXT: String name: float, CU vector index: 6 +# CHECK-NEXT: 518: Name offset = 0x89, CU vector offset = 0x4c +# CHECK-NEXT: String name: char, CU vector index: 9 +# CHECK-NEXT: 754: Name offset = 0x6b, CU vector offset = 0x20 +# CHECK-NEXT: String name: int, CU vector index: 4 +# CHECK-NEXT: 977: Name offset = 0x6f, CU vector offset = 0x28 +# CHECK-NEXT: String name: double, CU vector index: 5 +# CHECK: Constant pool offset = 0x2060, has 10 CU vectors: +# CHECK-NEXT: 0(0x0): 0x30000000 +# CHECK-NEXT: 1(0x8): 0x30000000 +# CHECK-NEXT: 2(0x10): 0x20000000 +# CHECK-NEXT: 3(0x18): 0x20000000 +# CHECK-NEXT: 4(0x20): 0x90000000 +# CHECK-NEXT: 5(0x28): 0x90000000 0x90000001 +# CHECK-NEXT: 6(0x34): 0x90000000 +# CHECK-NEXT: 7(0x3c): 0x30000001 +# CHECK-NEXT: 8(0x44): 0x20000001 +# CHECK-NEXT: 9(0x4c): 0x90000001