Index: lld/trunk/ELF/GdbIndex.h =================================================================== --- lld/trunk/ELF/GdbIndex.h +++ lld/trunk/ELF/GdbIndex.h @@ -47,6 +47,10 @@ // parsed CU. std::vector> readAddressArea(size_t CurrentCU); + // Method extracts public names and types. It returns list of name and + // gnu_pub* kind pairs. + std::vector> readPubNamesAndTypes(); + private: // Method returns section file offset as a load addres for DWARF parser. That // allows to find the target section index for address ranges. @@ -55,6 +59,40 @@ std::unique_ptr clone() const override; }; +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; + } // namespace elf } // namespace lld Index: lld/trunk/ELF/GdbIndex.cpp =================================================================== --- lld/trunk/ELF/GdbIndex.cpp +++ lld/trunk/ELF/GdbIndex.cpp @@ -56,11 +56,6 @@ // .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the // hashtable in according to .gdb_index format specification. // 6) Constant pool is populated at the same time as symbol table. -// -// Current version implements steps 1-4. So it writes .gdb_index -// header, list of compilation units and address area. Since we so not plan to -// support types CU list area, it is also empty and so far is "implemented". -// Other data areas are not yet implemented. //===----------------------------------------------------------------------===// #include "GdbIndex.h" @@ -92,6 +87,80 @@ } template +std::vector> +GdbIndexBuilder::readPubNamesAndTypes() { + const bool IsLE = ELFT::TargetEndianness == llvm::support::little; + StringRef Data[] = {Dwarf->getGnuPubNamesSection(), + Dwarf->getGnuPubTypesSection()}; + + std::vector> Ret; + for (StringRef D : Data) { + DataExtractor PubNames(D, IsLE, 0); + uint32_t Offset = 0; + while (PubNames.isValidOffset(Offset)) { + // Skip length, version, unit offset and size. + Offset += 14; + while (Offset < D.size()) { + uint32_t DieRef = PubNames.getU32(&Offset); + if (DieRef == 0) + break; + uint8_t Flags = PubNames.getU8(&Offset); + const char *Name = PubNames.getCStr(&Offset); + Ret.push_back({Name, Flags}); + } + } + } + + return Ret; +} + +std::pair GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. The step size used to find +// the next candidate slot when handling a hash collision is specified in +// .gdb_index section format. The hash value for a table entry is computed by +// applying an iterative hash function to the symbol's name. +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +template static InputSectionBase * findSection(ArrayRef *> Arr, uint64_t Offset) { for (InputSectionBase *S : Arr) Index: lld/trunk/ELF/SyntheticSections.h =================================================================== --- lld/trunk/ELF/SyntheticSections.h +++ lld/trunk/ELF/SyntheticSections.h @@ -13,6 +13,7 @@ #include "GdbIndex.h" #include "InputSection.h" #include "llvm/ADT/MapVector.h" +#include "llvm/MC/StringTableBuilder.h" namespace lld { namespace elf { @@ -485,6 +486,13 @@ // Pairs of [CU Offset, CU length]. std::vector> CompilationUnits; + llvm::StringTableBuilder StringPool; + + GdbHashTab SymbolTable; + + // The CU vector portion of the constant pool. + std::vector>> CuVectors; + std::vector> AddressArea; private: @@ -493,6 +501,11 @@ uint32_t CuTypesOffset; uint32_t SymTabOffset; + uint32_t ConstantPoolOffset; + uint32_t StringPoolOffset; + + size_t CuVectorsSize = 0; + std::vector CuVectorsOffset; bool Finalized = false; }; Index: lld/trunk/ELF/SyntheticSections.cpp =================================================================== --- lld/trunk/ELF/SyntheticSections.cpp +++ lld/trunk/ELF/SyntheticSections.cpp @@ -1457,7 +1457,8 @@ template GdbIndexSection::GdbIndexSection() - : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index") {} + : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), + StringPool(llvm::StringTableBuilder::ELF) {} template void GdbIndexSection::parseDebugSections() { for (InputSectionBase *S : Symtab::X->Sections) @@ -1466,6 +1467,16 @@ readDwarf(IS); } +// Iterative hash function for symbol's name is described in .gdb_index format +// specification. Note that we use one for version 5 to 7 here, it is different +// for version 4. +static uint32_t hash(StringRef Str) { + uint32_t R = 0; + for (uint8_t C : Str) + R = R * 67 + tolower(C) - 113; + return R; +} + template void GdbIndexSection::readDwarf(InputSection *I) { GdbIndexBuilder Builder(I); @@ -1478,6 +1489,27 @@ std::vector> AddrArea = Builder.readAddressArea(CuId); AddressArea.insert(AddressArea.end(), AddrArea.begin(), AddrArea.end()); + + std::vector> NamesAndTypes = + Builder.readPubNamesAndTypes(); + + for (std::pair &Pair : NamesAndTypes) { + uint32_t Hash = hash(Pair.first); + size_t Offset = StringPool.add(Pair.first); + + bool IsNew; + GdbSymbol *Sym; + std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); + if (IsNew) { + Sym->CuVectorIndex = CuVectors.size(); + CuVectors.push_back({{CuId, Pair.second}}); + continue; + } + + std::vector> &CuVec = + CuVectors[Sym->CuVectorIndex]; + CuVec.push_back({CuId, Pair.second}); + } } template void GdbIndexSection::finalize() { @@ -1491,20 +1523,31 @@ // and 5 more fields with different kinds of offsets. CuTypesOffset = CuListOffset + CompilationUnits.size() * CompilationUnitSize; SymTabOffset = CuTypesOffset + AddressArea.size() * AddressEntrySize; + + ConstantPoolOffset = + SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; + + for (std::vector> &CuVec : CuVectors) { + CuVectorsOffset.push_back(CuVectorsSize); + CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); + } + StringPoolOffset = ConstantPoolOffset + CuVectorsSize; + + StringPool.finalizeInOrder(); } template size_t GdbIndexSection::getSize() const { const_cast *>(this)->finalize(); - return SymTabOffset; + return StringPoolOffset + StringPool.getSize(); } template void GdbIndexSection::writeTo(uint8_t *Buf) { - write32le(Buf, 7); // Write version. - write32le(Buf + 4, CuListOffset); // CU list offset. - write32le(Buf + 8, CuTypesOffset); // Types CU list offset. - write32le(Buf + 12, CuTypesOffset); // Address area offset. - write32le(Buf + 16, SymTabOffset); // Symbol table offset. - write32le(Buf + 20, SymTabOffset); // Constant pool offset. + write32le(Buf, 7); // Write version. + write32le(Buf + 4, CuListOffset); // CU list offset. + write32le(Buf + 8, CuTypesOffset); // Types CU list offset. + write32le(Buf + 12, CuTypesOffset); // Address area offset. + write32le(Buf + 16, SymTabOffset); // Symbol table offset. + write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset. Buf += 24; // Write the CU list. @@ -1522,6 +1565,34 @@ write32le(Buf + 16, E.CuIndex); Buf += 20; } + + // Write the symbol table. + for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { + GdbSymbol *Sym = SymbolTable.getSymbol(I); + if (Sym) { + size_t NameOffset = + Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; + size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; + write32le(Buf, NameOffset); + write32le(Buf + 4, CuVectorOffset); + } + Buf += 8; + } + + // Write the CU vectors into the constant pool. + for (std::vector> &CuVec : CuVectors) { + write32le(Buf, CuVec.size()); + Buf += 4; + for (std::pair &P : CuVec) { + uint32_t Index = P.first; + uint8_t Flags = P.second; + Index |= Flags << 24; + write32le(Buf, Index); + Buf += 4; + } + } + + StringPool.write(Buf); } template bool GdbIndexSection::empty() const { Index: lld/trunk/test/ELF/gdb-index.s =================================================================== --- lld/trunk/test/ELF/gdb-index.s +++ lld/trunk/test/ELF/gdb-index.s @@ -36,5 +36,14 @@ # CHECK: Address area offset = 0x38, has 2 entries: # CHECK-NEXT: Low address = 0x201000, High address = 0x20100b, CU index = 0 # CHECK-NEXT: Low address = 0x20100b, High address = 0x201016, CU index = 1 -# CHECK: Symbol table offset = 0x60, size = 0, filled slots: -# CHECK: Constant pool offset = 0x60, has 0 CU vectors: +# CHECK: Symbol table offset = 0x60, size = 1024, filled slots: +# CHECK-NEXT: 489: Name offset = 0x1d, CU vector offset = 0x0 +# CHECK-NEXT: String name: main, CU vector index: 0 +# CHECK-NEXT: 754: Name offset = 0x22, CU vector offset = 0x8 +# CHECK-NEXT: String name: int, CU vector index: 1 +# CHECK-NEXT: 956: Name offset = 0x26, CU vector offset = 0x14 +# CHECK-NEXT: String name: main2, CU vector index: 2 +# CHECK: Constant pool offset = 0x2060, has 3 CU vectors: +# CHECK-NEXT: 0(0x0): 0x30000000 +# CHECK-NEXT: 1(0x8): 0x90000000 0x90000001 +# CHECK-NEXT: 2(0x14): 0x30000001