Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -2366,27 +2366,55 @@ typedef GdbIndexSection::GdbSymbol GdbSymbol; typedef GdbIndexSection::NameTypeEntry NameTypeEntry; - // A map to uniquify symbols by name. - DenseMap Map; + // The number of symbols we will handle in this function is of the order + // of millions for very large executables, so we use multi-threading to + // speed it up. + size_t NumShards = 16; + size_t Concurrency = 1; + if (ThreadsEnabled) + Concurrency = + std::min(PowerOf2Floor(hardware_concurrency()), NumShards); + + // A sharded map to uniquify symbols by name. + std::vector> Map(NumShards); // Instantiate GdbSymbols while uniqufying them by name. - std::vector Ret; - for (ArrayRef Entries : NameTypes) { - for (const NameTypeEntry &Ent : Entries) { - size_t &Idx = Map[Ent.Name]; - if (Idx) { - // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains - // duplicate entries, so we want to dedup them. - std::vector &Vec = Ret[Idx - 1].CuVector; - if (Vec.empty() || Vec.back() != Ent.Type) - Vec.push_back(Ent.Type); - continue; - } + std::vector> Symbols(NumShards); + parallelForEachN(0, Concurrency, [&](size_t ThreadId) { + for (ArrayRef Entries : NameTypes) { + for (const NameTypeEntry &Ent : Entries) { + CachedHashStringRef S = Ent.Name; + size_t ShardId = S.hash() >> (32 - countTrailingZeros(NumShards)); + if ((ShardId & (Concurrency - 1)) != ThreadId) + continue; - Idx = Ret.size() + 1; - Ret.push_back({Ent.Name, {Ent.Type}, 0, 0}); + size_t &Idx = Map[ShardId][Ent.Name]; + if (Idx) { + // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains + // duplicate entries, so we want to dedup them. + std::vector &Vec = Symbols[ShardId][Idx - 1].CuVector; + if (Vec.empty() || Vec.back() != Ent.Type) + Vec.push_back(Ent.Type); + continue; + } + + Idx = Symbols[ShardId].size() + 1; + Symbols[ShardId].push_back({Ent.Name, {Ent.Type}, 0, 0}); + } } - } + }); + + size_t NumSymbols = 0; + for (ArrayRef V : Symbols) + NumSymbols += V.size(); + + // The return type is a flattened vector, so we'll copy each vector + // contents to Ret. + std::vector Ret; + Ret.reserve(NumSymbols); + for (std::vector &Vec : Symbols) + for (GdbSymbol &Sym : Vec) + Ret.push_back(std::move(Sym)); // CU vectors and symbol names are adjacent in the output file. // We can compute their offsets in the output file now. Index: lld/test/ELF/gdb-index.s =================================================================== --- lld/test/ELF/gdb-index.s +++ lld/test/ELF/gdb-index.s @@ -34,16 +34,16 @@ # DWARF-NEXT: Low/High address = [0x201000, 0x201001) (Size: 0x1), CU id = 0 # DWARF-NEXT: Low/High address = [0x201004, 0x201006) (Size: 0x2), CU id = 1 # DWARF: Symbol table offset = 0x60, size = 1024, filled slots: -# DWARF-NEXT: 512: Name offset = 0x2b, CU vector offset = 0x14 -# DWARF-NEXT: String name: aaaaaaaaaaaaaaaa, CU vector index: 2 -# DWARF-NEXT: 754: Name offset = 0x27, CU vector offset = 0x8 -# DWARF-NEXT: String name: int, CU vector index: 1 -# DWARF-NEXT: 822: Name offset = 0x1c, CU vector offset = 0x0 -# DWARF-NEXT: String name: entrypoint, CU vector index: 0 +# DWARF-NEXT: 512: Name offset = 0x1c, CU vector offset = 0x0 +# DWARF-NEXT: String name: aaaaaaaaaaaaaaaa, CU vector index: 0 +# DWARF-NEXT: 754: Name offset = 0x38, CU vector offset = 0x10 +# DWARF-NEXT: String name: int, CU vector index: 2 +# DWARF-NEXT: 822: Name offset = 0x2d, CU vector offset = 0x8 +# DWARF-NEXT: String name: entrypoint, CU vector index: 1 # DWARF: Constant pool offset = 0x2060, has 3 CU vectors: -# DWARF-NEXT: 0(0x0): 0x30000000 -# DWARF-NEXT: 1(0x8): 0x90000000 0x90000001 -# DWARF-NEXT: 2(0x14): 0x30000001 +# DWARF-NEXT: 0(0x0): 0x30000001 +# DWARF-NEXT: 1(0x8): 0x30000000 +# DWARF-NEXT: 2(0x10): 0x90000000 0x90000001 # SECTION-NOT: debug_gnu_pubnames