Index: ELF/OutputSections.h =================================================================== --- ELF/OutputSections.h +++ ELF/OutputSections.h @@ -16,11 +16,14 @@ #include "lld/Core/LLVM.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/ELF.h" +#include namespace lld { namespace elf { class SymbolBody; +class ParallelStringTableBuilder; +struct SectionPiece; struct EhSectionPiece; template class EhInputSection; template class InputSection; @@ -143,8 +146,11 @@ void finalizeTailMerge(); void finalizeNoTailMerge(); + ParallelStringTableBuilder *ParallelBuilder = nullptr; + llvm::StringTableBuilder Builder; std::vector *> Sections; + size_t StringAlignment; }; struct CieRecord { Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -21,6 +21,8 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SHA1.h" +#include +#include using namespace llvm; using namespace llvm::dwarf; @@ -466,14 +468,99 @@ } } +// This is a thread-safe string table builder. Internally it uses +// atomic variables to keep the inserted strings, their offsets, +// and the next string offset. +// +// The internal hash table is an open-addressing one. It doesn't +// support resizing. Once it becomes full, the behavior is undefined. +// +// Just like DenseMap, keys and values are directory stored to buckets +// as pairs. Originally, keys and values are all null. Keys are +// pointers to strings. Top 32 bits of values are string sizes, and +// bottom 32 bits are offsets in the string table. +class elf::ParallelStringTableBuilder { + struct EntryTy { + std::atomic Key; + std::atomic Val; + }; + + size_t align2(size_t Val) { return (Val + Alignment - 1) & ~(Alignment - 1); } + +public: + ParallelStringTableBuilder(size_t ExpectedNumItems, size_t Alignment) + : NextVal(0), Alignment(Alignment), + NumBuckets(std::max(PowerOf2Ceil(ExpectedNumItems * 5), 256)) { + Buckets = (EntryTy *)calloc(NumBuckets, sizeof(EntryTy)); + } + + ~ParallelStringTableBuilder() { free(Buckets); } + + // Inserts a given string to the string table and returns its offset. + uint32_t insert(CachedHashStringRef Str) { + size_t Start = Str.hash() & (NumBuckets - 1); + + for (size_t I = Start; I != Start - 1; I = (I + 1) & (NumBuckets - 1)) { + EntryTy &Bucket = Buckets[I]; + + // If the current bucket is empty, claim it. Since it's a new string, + // increment NextVal and returns its original value. + const char *Null = nullptr; + if (Bucket.Key.compare_exchange_strong(Null, Str.val().data())) { + size_t Offset = NextVal.fetch_add(align2(Str.size())); + Bucket.Val.store(((uint64_t)Str.size() << 32) | Offset); + return Offset; + } + + // The current bucket contains some string. Its size might not be + // written by other thread yet, so try loading until it becomes + // observable. + const char *OldKey = Bucket.Key.load(); + uint64_t OldVal; + while ((OldVal = Bucket.Val.load()) == 0) + ; + + // If the existing string is the same as the given key, + // we don't need to insert the key. If they are different, + // go to next bucket. + if (Str.val() == StringRef(OldKey, (OldVal >> 32))) + return OldVal & 0xFFFFFFFF; + } + assert("table is full"); + return 0; + } + + void writeTo(uint8_t *Buf) { + for (size_t I = 0; I < NumBuckets; ++I) { + if (const char *P = Buckets[I].Key.load()) { + uint64_t Val = Buckets[I].Val.load(); + size_t Size = Val >> 32; + size_t Off = Val & 0xFFFFFFFF; + memcpy(Buf + Off, P, Size); + } + } + } + + size_t size() const { return NextVal.load(); } + +private: + std::atomic NextVal; + size_t Alignment; + size_t NumBuckets; + EntryTy *Buckets; +}; + template MergeOutputSection::MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags, uintX_t Alignment) : OutputSectionBase(Name, Type, Flags), - Builder(StringTableBuilder::RAW, Alignment) {} + Builder(StringTableBuilder::RAW, Alignment), StringAlignment(Alignment) {} template void MergeOutputSection::writeTo(uint8_t *Buf) { - Builder.write(Buf); + if (ParallelBuilder) + ParallelBuilder->writeTo(Buf); + else + Builder.write(Buf); } template @@ -525,10 +612,25 @@ } template void MergeOutputSection::finalize() { - if (shouldTailMerge()) + if (shouldTailMerge()) { finalizeTailMerge(); - else - finalizeNoTailMerge(); + return; + } + + size_t NumPieces = 0; + for (MergeInputSection *Sec : Sections) + NumPieces += Sec->Pieces.size(); + ParallelBuilder = + new ParallelStringTableBuilder(NumPieces / 10, StringAlignment); + + parallel_for_each( + Sections.begin(), Sections.end(), [&](MergeInputSection *Sec) { + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = ParallelBuilder->insert(Sec->getData(I)); + }); + + this->Size = ParallelBuilder->size(); } template