Index: ELF/GdbIndex.h =================================================================== --- ELF/GdbIndex.h +++ ELF/GdbIndex.h @@ -12,15 +12,61 @@ #include "InputFiles.h" #include "llvm/Object/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" namespace lld { namespace elf { template class InputSection; -template -std::vector> -readCuList(InputSection *Sec); +template class GdbIndexBuilder { + typedef typename ELFT::uint uintX_t; + + InputSection *DebugInfoSec; + + std::unique_ptr Dwarf; + +public: + GdbIndexBuilder(InputSection *DebugInfoSec); + + std::vector> readCuList(); + + std::vector> readPubNamesAndTypes(); +}; + +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; } // namespace elf } // namespace lld Index: ELF/GdbIndex.cpp =================================================================== --- ELF/GdbIndex.cpp +++ ELF/GdbIndex.cpp @@ -57,34 +57,35 @@ // hashtable in according to .gdb_index format specification. // 6) Constant pool is populated at the same time as symbol table. // -// Current version of implementation has 1, 2, 3 steps. So it writes .gdb_index -// header and list of compilation units. Since we so not plan to support types -// CU list area, it is also empty and so far is "implemented". -// Other data areas are not yet implemented. +// Current version of implementation has 1, 2, 3, 5, 6 steps. So it writes +// .gdb_index header and list of compilation units. Also it builds and writes +// symbol table and constant pool. +// Since we do not plan to support types CU list area, it is also empty and +// so far is "implemented". Address area is the only not implemented part. //===----------------------------------------------------------------------===// #include "GdbIndex.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ELFObjectFile.h" using namespace llvm; +using namespace llvm::dwarf; using namespace llvm::object; +using namespace lld::elf; template -std::vector> -lld::elf::readCuList(InputSection *DebugInfoSec) { - typedef typename ELFT::uint uintX_t; - - std::unique_ptr Dwarf; +GdbIndexBuilder::GdbIndexBuilder(InputSection *DebugInfoSec) + : DebugInfoSec(DebugInfoSec) { if (Expected> Obj = object::ObjectFile::createObjectFile(DebugInfoSec->getFile()->MB)) Dwarf.reset(new DWARFContextInMemory(*Obj.get())); - - if (!Dwarf) { + else error(toString(DebugInfoSec->getFile()) + ": error creating DWARF context"); - return {}; - } +} +template +std::vector> +GdbIndexBuilder::readCuList() { std::vector> Ret; for (std::unique_ptr &CU : Dwarf->compile_units()) Ret.push_back( @@ -92,11 +93,85 @@ return Ret; } -template std::vector> -lld::elf::readCuList(InputSection *); -template std::vector> -lld::elf::readCuList(InputSection *); -template std::vector> -lld::elf::readCuList(InputSection *); -template std::vector> -lld::elf::readCuList(InputSection *); +template +std::vector> +GdbIndexBuilder::readPubNamesAndTypes() { + const bool IsLE = ELFT::TargetEndianness == llvm::support::little; + StringRef Data[] = {Dwarf->getGnuPubNamesSection(), + Dwarf->getGnuPubTypesSection()}; + + std::vector> Ret; + for (StringRef D : Data) { + DataExtractor PubNames(D, IsLE, 0); + uint32_t Offset = 0; + while (PubNames.isValidOffset(Offset)) { + // Skip length, version, unit offset and size. + Offset += 14; + while (Offset < D.size()) { + uint32_t DieRef = PubNames.getU32(&Offset); + if (DieRef == 0) + break; + uint8_t Flags = PubNames.getU8(&Offset); + const char *Name = PubNames.getCStr(&Offset); + Ret.push_back({Name, Flags}); + } + } + } + + return Ret; +} + +std::pair GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. +// The step size used to find the next candidate +// slot when handling a hash collision is specified in .gdb_index section format +// (https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html) +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +namespace lld { +namespace elf { +template class GdbIndexBuilder; +template class GdbIndexBuilder; +template class GdbIndexBuilder; +template class GdbIndexBuilder; +} +} Index: ELF/SyntheticSections.h =================================================================== --- ELF/SyntheticSections.h +++ ELF/SyntheticSections.h @@ -13,6 +13,7 @@ #include "GdbIndex.h" #include "InputSection.h" #include "llvm/ADT/MapVector.h" +#include "llvm/MC/StringTableBuilder.h" namespace lld { namespace elf { @@ -445,17 +446,32 @@ GdbIndexSection(); void finalize() override; void writeTo(uint8_t *Buf) override; - size_t getSize() const override { return CuTypesOffset; } + size_t getSize() const override; bool empty() const override; // Pairs of [CU Offset, CU length]. std::vector> CompilationUnits; + llvm::StringTableBuilder StringPool; + + GdbHashTab SymbolTable; + + // The CU vector portion of the constant pool. + std::vector>> CuVectors; + private: void parseDebugSections(); void readDwarf(InputSection *I); uint32_t CuTypesOffset; + uint32_t SymTabOffset; + uint32_t ConstantPoolOffset; + uint32_t StringPoolOffset; + + size_t CuVectorsSize = 0; + std::vector CuVectorsOffset; + + bool Finalized = false; }; // --eh-frame-hdr option tells linker to construct a header for all the Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -1402,37 +1402,93 @@ template GdbIndexSection::GdbIndexSection() - : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index") {} + : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), + StringPool(llvm::StringTableBuilder::ELF) {} template void GdbIndexSection::parseDebugSections() { - std::vector *> &IS = - static_cast *>(Out::DebugInfo)->Sections; + for (InputSectionBase *S : Symtab::X->Sections) + if (InputSection *IS = dyn_cast>(S)) + if (IS->OutSec && IS->Name == ".debug_info") + readDwarf(IS); +} - for (InputSection *I : IS) - readDwarf(I); +// Iterative hash function for symbol's name is described at +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html +static uint32_t hash(StringRef Str) { + uint32_t R = 0; + for (uint8_t C : Str) + R = R * 67 + tolower(C) - 113; + return R; } template void GdbIndexSection::readDwarf(InputSection *I) { - std::vector> CuList = readCuList(I); + GdbIndexBuilder Builder(I); + if (ErrorCount) + return; + + size_t CuId = CompilationUnits.size(); + std::vector> CuList = Builder.readCuList(); CompilationUnits.insert(CompilationUnits.end(), CuList.begin(), CuList.end()); + + std::vector> NamesAndTypes = + Builder.readPubNamesAndTypes(); + + for (std::pair &Pair : NamesAndTypes) { + uint32_t Hash = hash(Pair.first); + size_t Offset = StringPool.add(Pair.first); + + bool IsNew; + GdbSymbol *Sym; + std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); + if (IsNew) { + Sym->CuVectorIndex = CuVectors.size(); + CuVectors.push_back({{CuId, Pair.second}}); + continue; + } + + std::vector> &CuVec = + CuVectors[Sym->CuVectorIndex]; + CuVec.push_back({CuId, Pair.second}); + } } template void GdbIndexSection::finalize() { + if (Finalized) + return; + parseDebugSections(); // GdbIndex header consist from version fields // and 5 more fields with different kinds of offsets. CuTypesOffset = CuListOffset + CompilationUnits.size() * CompilationUnitSize; + + SymTabOffset = CuTypesOffset; + ConstantPoolOffset = + SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; + + for (std::vector> &CuVec : CuVectors) { + CuVectorsOffset.push_back(CuVectorsSize); + CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); + } + StringPoolOffset = ConstantPoolOffset + CuVectorsSize; + + StringPool.finalizeInOrder(); + Finalized = true; +} + +template size_t GdbIndexSection::getSize() const { + const_cast *>(this)->finalize(); + return StringPoolOffset + StringPool.getSize(); } template void GdbIndexSection::writeTo(uint8_t *Buf) { - write32le(Buf, 7); // Write Version - write32le(Buf + 4, CuListOffset); // CU list offset - write32le(Buf + 8, CuTypesOffset); // Types CU list offset - write32le(Buf + 12, CuTypesOffset); // Address area offset - write32le(Buf + 16, CuTypesOffset); // Symbol table offset - write32le(Buf + 20, CuTypesOffset); // Constant pool offset + write32le(Buf, 7); // Write Version + write32le(Buf + 4, CuListOffset); // CU list offset + write32le(Buf + 8, CuTypesOffset); // Types CU list offset + write32le(Buf + 12, CuTypesOffset); // Address area offset + write32le(Buf + 16, SymTabOffset); // Symbol table offset + write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset Buf += 24; // Write the CU list. @@ -1441,6 +1497,34 @@ write64le(Buf + 8, CU.second); Buf += 16; } + + // Write the symbol table. + for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { + GdbSymbol *Sym = SymbolTable.getSymbol(I); + if (Sym) { + size_t NameOffset = + Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; + size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; + write32le(Buf, NameOffset); + write32le(Buf + 4, CuVectorOffset); + } + Buf += 8; + } + + // Write the CU vectors into the constant pool. + for (std::vector> &CuVec : CuVectors) { + write32le(Buf, CuVec.size()); + Buf += 4; + for (std::pair &P : CuVec) { + uint32_t Index = P.first; + uint8_t Flags = P.second; + Index |= Flags << 24; + write32le(Buf, Index); + Buf += 4; + } + } + + StringPool.write(Buf); } template bool GdbIndexSection::empty() const { Index: test/ELF/gdb-index.s =================================================================== --- test/ELF/gdb-index.s +++ test/ELF/gdb-index.s @@ -29,10 +29,19 @@ # DISASM-CHECK: 11015: c3 retq # CHECK: .gnu_index contents: -# CHECK-NEXT: Version = 7 -# CHECK: CU list offset = 0x18, has 2 entries: -# CHECK-NEXT: 0: Offset = 0x0, Length = 0x34 -# CHECK-NEXT: 1: Offset = 0x34, Length = 0x34 -# CHECK: Address area offset = 0x38, has 0 entries: -# CHECK: Symbol table offset = 0x38, size = 0, filled slots: -# CHECK: Constant pool offset = 0x38, has 0 CU vectors: +# CHECK-NEXT: Version = 7 +# CHECK: CU list offset = 0x18, has 2 entries: +# CHECK-NEXT: 0: Offset = 0x0, Length = 0x34 +# CHECK-NEXT: 1: Offset = 0x34, Length = 0x34 +# CHECK: Address area offset = 0x38, has 0 entries: +# CHECK: Symbol table offset = 0x38, size = 1024, filled slots: +# CHECK-NEXT: 489: Name offset = 0x1d, CU vector offset = 0x0 +# CHECK-NEXT: String name: main, CU vector index: 0 +# CHECK-NEXT: 754: Name offset = 0x22, CU vector offset = 0x8 +# CHECK-NEXT: String name: int, CU vector index: 1 +# CHECK-NEXT: 956: Name offset = 0x26, CU vector offset = 0x14 +# CHECK-NEXT: String name: main2, CU vector index: 2 +# CHECK: Constant pool offset = 0x2038, has 3 CU vectors: +# CHECK-NEXT: 0(0x0): 0x30000000 +# CHECK-NEXT: 1(0x8): 0x90000000 0x90000001 +# CHECK-NEXT: 2(0x14): 0x30000001