Index: ELF/SyntheticSections.h =================================================================== --- ELF/SyntheticSections.h +++ ELF/SyntheticSections.h @@ -668,7 +668,7 @@ struct NameTypeEntry { llvm::CachedHashStringRef Name; - uint8_t Type; + uint32_t Type; }; InputSection *DebugInfoSec; @@ -679,9 +679,9 @@ // The symbol type for the .gdb_index section. struct GdbSymbol { - uint32_t NameHash; - size_t NameOffset; - size_t CuVectorIndex; + llvm::CachedHashStringRef Name; + uint32_t OutputOff; + uint32_t CuVectorIdx; }; class GdbIndexSection final : public SyntheticSection { @@ -692,19 +692,12 @@ bool empty() const override; private: - void fixCuIndex(); - std::vector> createCuVectors(); - std::vector createGdbSymtab(); - // A symbol table for this .gdb_index section. - std::vector GdbSymtab; + std::vector Symbols; // CU vector is a part of constant pool area of section. std::vector> CuVectors; - // Symbol table contents. - llvm::DenseMap Symbols; - // Each chunk contains information gathered from a debug sections of single // object and used to build different areas of gdb index. std::vector Chunks; @@ -712,12 +705,13 @@ uint64_t CuListOffset = 24; uint64_t CuTypesOffset; uint64_t SymtabOffset; + uint64_t SymtabSize = 0; uint64_t ConstantPoolOffset; uint64_t CuVectorsPoolSize = 0; uint64_t StringPoolSize; uint64_t TotalSize; - std::vector CuVectorOffsets; + std::vector CuVectorOffsets; }; template GdbIndexSection *createGdbIndex(); Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -52,6 +52,7 @@ using namespace lld; using namespace lld::elf; +using llvm::support::endian::read32le; using llvm::support::endian::write32le; using llvm::support::endian::write64le; @@ -2311,19 +2312,17 @@ } static std::vector -readPubNamesAndTypes(DWARFContext &Dwarf) { +readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) { StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection(); StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection(); std::vector Ret; for (StringRef Sec : {Sec1, Sec2}) { DWARFDebugPubTable Table(Sec, Config->IsLE, true); - for (const DWARFDebugPubTable::Set &Set : Table.getData()) { - for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) { - CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name)); - Ret.push_back({S, Ent.Descriptor.toBits()}); - } - } + for (const DWARFDebugPubTable::Set &Set : Table.getData()) + for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) + Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)}, + (Ent.Descriptor.toBits() << 24) | Idx}); } return Ret; } @@ -2337,43 +2336,6 @@ return Ret; } -void GdbIndexSection::fixCuIndex() { - uint32_t Idx = 0; - for (GdbIndexChunk &Chunk : Chunks) { - for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas) - Ent.CuIndex += Idx; - Idx += Chunk.CompilationUnits.size(); - } -} - -std::vector> GdbIndexSection::createCuVectors() { - std::vector> Ret; - uint32_t Idx = 0; - uint32_t Off = 0; - - for (GdbIndexChunk &Chunk : Chunks) { - for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) { - GdbSymbol *&Sym = Symbols[Ent.Name]; - if (!Sym) { - Sym = make(GdbSymbol{Ent.Name.hash(), Off, Ret.size()}); - Off += Ent.Name.size() + 1; - Ret.push_back({}); - } - - // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains - // duplicate entries, so we want to dedup them. - std::vector &Vec = Ret[Sym->CuVectorIndex]; - uint32_t Val = (Ent.Type << 24) | Idx; - if (Vec.empty() || Vec.back() != Val) - Vec.push_back(Val); - } - Idx += Chunk.CompilationUnits.size(); - } - - StringPoolSize = Off; - return Ret; -} - template GdbIndexSection *elf::createGdbIndex() { // Gather debug info to create a .gdb_index section. std::vector Sections = getDebugInfoSections(); @@ -2386,7 +2348,7 @@ Chunks[I].DebugInfoSec = Sections[I]; Chunks[I].CompilationUnits = readCuList(Dwarf); Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]); - Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf); + Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I); }); // .debug_gnu_pub{names,types} are useless in executables. @@ -2414,37 +2376,48 @@ return Ret; } -std::vector GdbIndexSection::createGdbSymtab() { - uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3); - if (Size < 1024) - Size = 1024; - - uint32_t Mask = Size - 1; - std::vector Ret(Size); - - for (auto &KV : Symbols) { - GdbSymbol *Sym = KV.second; - uint32_t I = Sym->NameHash & Mask; - uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1; - - while (Ret[I]) - I = (I + Step) & Mask; - Ret[I] = Sym; - } - return Ret; +// Returns the desired size of an on-disk hash table for a .gdb_index section. +// There's a tradeoff between size and collision rate. We aim 75% utilization. +static size_t getSymtabSize(size_t NumSymbols) { + return std::max(NextPowerOf2(NumSymbols * 4 / 3), 1024); } GdbIndexSection::GdbIndexSection(std::vector &&C) : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), Chunks(std::move(C)) { - fixCuIndex(); - CuVectors = createCuVectors(); - GdbSymtab = createGdbSymtab(); + // A map to identify duplicate symbols. + DenseMap Map; + + // Initialize Symbols and CuVectors while deduplicating symbols by name. + for (GdbIndexChunk &Chunk : Chunks) { + for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) { + CachedHashStringRef S = Ent.Name; + size_t &Idx = Map[S]; + + if (!Idx) { + Idx = Symbols.size() + 1; + Symbols.push_back({S, static_cast(StringPoolSize), + static_cast(Symbols.size())}); + StringPoolSize += S.size() + 1; + CuVectors.push_back({}); + } + + // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains + // duplicate entries, so we want to dedup them. + std::vector &Vec = CuVectors[Symbols[Idx - 1].CuVectorIdx]; + if (Vec.empty() || Vec.back() != Ent.Type) + Vec.push_back(Ent.Type); + } + + // NamesAndTypes is useless beyond this point, so clear it to save memory. + Chunk.NamesAndTypes = {}; + } // Compute offsets early to know the section size. // Each chunk size needs to be in sync with what we write in writeTo. CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16; SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20; - ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8; + SymtabSize = getSymtabSize(Symbols.size()); + ConstantPoolOffset = SymtabOffset + SymtabSize * 8; for (ArrayRef Vec : CuVectors) { CuVectorOffsets.push_back(CuVectorsPoolSize); @@ -2480,25 +2453,35 @@ } // Write the address area. - for (GdbIndexChunk &D : Chunks) { - for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) { + uint32_t Idx = 0; + for (GdbIndexChunk &Chunk : Chunks) { + for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) { uint64_t BaseAddr = E.Section->getVA(0); write64le(Buf, BaseAddr + E.LowAddress); write64le(Buf + 8, BaseAddr + E.HighAddress); - write32le(Buf + 16, E.CuIndex); + write32le(Buf + 16, E.CuIndex + Idx); Buf += 20; } + Idx += Chunk.CompilationUnits.size(); } - // Write the symbol table. - for (GdbSymbol *Sym : GdbSymtab) { - if (Sym) { - write32le(Buf, CuVectorsPoolSize + Sym->NameOffset); - write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]); - } - Buf += 8; + // Write the on-disk open-addressing hash table containing symbols. + for (GdbSymbol &Sym : Symbols) { + uint32_t Mask = SymtabSize - 1; + uint32_t H = Sym.Name.hash(); + uint32_t I = H & Mask; + uint32_t Step = ((H * 17) & Mask) | 1; + + while (read32le(Buf + I * 8)) + I = (I + Step) & Mask; + + uint8_t *P = Buf + I * 8; + write32le(P, CuVectorsPoolSize + Sym.OutputOff); + write32le(P + 4, CuVectorOffsets[Sym.CuVectorIdx]); } + Buf += SymtabSize * 8; + // Write the CU vectors. for (ArrayRef Vec : CuVectors) { write32le(Buf, Vec.size()); @@ -2510,13 +2493,8 @@ } // Write the string pool. - for (auto &KV : Symbols) { - CachedHashStringRef S = KV.first; - GdbSymbol *Sym = KV.second; - size_t Off = Sym->NameOffset; - memcpy(Buf + Off, S.val().data(), S.size()); - Buf[Off + S.size()] = '\0'; - } + for (GdbSymbol &Sym : Symbols) + memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size()); } bool GdbIndexSection::empty() const { return !Out::DebugInfo; }