Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -160,14 +160,16 @@ // be found by looking at the next one) and put the hash in a side table. struct SectionPiece { SectionPiece(size_t Off, bool Live = false) - : InputOff(Off), OutputOff(-1), Live(Live || !Config->GcSections) {} - - size_t InputOff; - ssize_t OutputOff : 8 * sizeof(ssize_t) - 1; - size_t Live : 1; + : InputOff(Off), Live(Live || !Config->GcSections), OutputOff(-1) {} + + uint64_t InputOff : 40; + uint64_t Live : 1; + union { + int64_t OutputOff; + SectionPiece *Next; + }; }; -static_assert(sizeof(SectionPiece) == 2 * sizeof(size_t), - "SectionPiece is too big"); +static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big"); // This corresponds to a SHF_MERGE section of an input file. template class MergeInputSection : public InputSectionBase { Index: ELF/OutputSections.h =================================================================== --- ELF/OutputSections.h +++ ELF/OutputSections.h @@ -21,6 +21,7 @@ namespace elf { class SymbolBody; +class ParallelStringTableBuilder; struct EhSectionPiece; template class EhInputSection; template class InputSection; @@ -143,8 +144,11 @@ void finalizeTailMerge(); void finalizeNoTailMerge(); + ParallelStringTableBuilder *ParallelBuilder = nullptr; + llvm::StringTableBuilder Builder; std::vector *> Sections; + size_t StringAlignment; }; struct CieRecord { Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SHA1.h" +#include using namespace llvm; using namespace llvm::dwarf; @@ -466,14 +467,226 @@ } } +// This is a thread-safe string table builder. Internally it uses +// atomic variables to keep the inserted strings and associated +// SectionPieces. +// +// The internal hash table is an open-addressing one. It doesn't +// support resizing. Once it becomes full, the behavior is undefined. +// +// Just like DenseMap, keys and values are directory stored to buckets +// as pairs. Originally, keys and values are all null. Keys are +// pointers to strings. +// +// SectionPieces are managed using singly linked list. If a bucket +// have a value, it has a pointer to a SectionPiece. Other +// SectionPieces having the same string key can be followed by +// `Next` pointers of SectionPieces. +// +// Once you added all strings, you need to call finalize() to fix +// string table contents. When finalize() is called, hash table +// contents is nondeterministic because no one knows which thread +// have claimed earlier buckets in the hash table. +// +// Thus, the next step is to make it deterministic. For a streak of +// buckets having some values, we sort them. No matter what order +// multiple threads claim buckets, claimed buckets are claimed, and +// unclaimed buckets remain unclaimed. Therefore, by sortint buckets +// for each streak, we can make the entire hash table deterministic. +// +// Finally, we assign offsets to all SectionPieces associated with +// this hash table. +class elf::ParallelStringTableBuilder { + // Bucket entry type. + struct EntryTy { + // String key information. String key is guaranteed to be unique + // in a hash table. + std::atomic Str; + std::atomic Size; + + // Offset in the string table. Filled by finalize(). + uint32_t Offset; + + // SectionPieces having the same string contents are chained + // using pointers. This pointer points to the first node. + std::atomic Piece; + + EntryTy(const EntryTy &Other) { memcpy(this, &Other, sizeof(EntryTy)); } + + EntryTy &operator=(const EntryTy &Other) { + memcpy(this, &Other, sizeof(EntryTy)); + return *this; + } + + // Define a total order for string keys. The detail is not important, + // but it needs to be deterministic, so that we can get deterministic + // output from the hash table. + bool operator<(const EntryTy &Other) const { + size_t SizeA = Size.load(); + size_t SizeB = Other.Size.load(); + if (SizeA != SizeB) + return SizeA < SizeB; + return memcmp(Str.load(), Other.Str.load(), SizeA) < 0; + }; + }; + + static_assert(sizeof(EntryTy) == 24, "EntryTy is too big"); + + size_t align2(size_t Val) { return (Val + Alignment - 1) & ~(Alignment - 1); } + +public: + ParallelStringTableBuilder(size_t Size, size_t Alignment) + : Alignment(Alignment), + NumBuckets(std::max(PowerOf2Ceil(Size), 16)) { + Buckets = (EntryTy *)calloc(NumBuckets, sizeof(EntryTy)); + } + + ~ParallelStringTableBuilder() { free(Buckets); } + + // Inserts a given section piece to the string table. + void insert(SectionPiece &Piece, CachedHashStringRef Str) { + assert(Str.size() != 0); + + size_t Start = Str.hash() & (NumBuckets - 1); + + for (size_t I = Start; I != Start - 1; I = (I + 1) & (NumBuckets - 1)) { + EntryTy &Bucket = Buckets[I]; + + // If the current bucket is empty, claim it. + const char *Null = nullptr; + if (Bucket.Str.compare_exchange_strong(Null, Str.val().data())) { + Bucket.Size.store(Str.size()); + append(Bucket, Piece); + return; + } + + // The current bucket contains some string. Its size might not be + // written by other thread yet, so try loading until it becomes + // observable. + const char *OldStr = Bucket.Str.load(); + uint64_t OldSize = Bucket.Size.load(); + while (OldSize == 0) + OldSize = Bucket.Size.load(); + + // If there is an existing key, append Piece to the bucket. + // Otherwise, go to next bucket. + if (Str.val() == StringRef(OldStr, OldSize)) { + append(Bucket, Piece); + return; + } + } + + error("table is full"); + } + + void finalize() { + // Sort buckets to make the hash table contents deterministic. + sortBuckets(); + sortWrapAround(); + + // Set an offset in the string table for each bucket. + size_t Off = 0; + for (size_t I = 0; I < NumBuckets; ++I) { + if (size_t Size = Buckets[I].Size.load()) { + Off = align2(Off); + Buckets[I].Offset = Off; + Off += Buckets[I].Size.load(); + } + } + StringTableSize = Off; + + // Update SectionPieces' offsets. + parallel_for(size_t(0), NumBuckets, [&](size_t I) { + if (SectionPiece *Cur = Buckets[I].Piece.load()) { + for (SectionPiece *Next = Cur->Next; Next; + Cur = Next, Next = Next->Next) + Cur->OutputOff = Buckets[I].Offset; + Cur->OutputOff = Buckets[I].Offset; + } + }); + } + + void writeTo(uint8_t *Buf) { + for (size_t I = 0; I < NumBuckets; ++I) { + if (const char *Str = Buckets[I].Str.load()) { + size_t Size = Buckets[I].Size.load(); + memcpy(Buf + Buckets[I].Offset, Str, Size); + } + } + } + + size_t size() const { return StringTableSize; } + +private: + // Atomically append Piece to Bucket. + void append(EntryTy &Bucket, SectionPiece &Piece) { + for (;;) { + Piece.Next = Bucket.Piece.load(); + if (Bucket.Piece.compare_exchange_strong(Piece.Next, &Piece)) + return; + } + } + + // Find runs of buckets having some values and sort them. + void sortBuckets() { + for (size_t I = 0; I < NumBuckets;) { + if (Buckets[I].Size.load() == 0) { + ++I; + continue; + } + size_t Begin = I; + size_t End = I + 1; + + while (End < NumBuckets && Buckets[End].Size.load()) + ++End; + std::sort(Buckets + Begin, Buckets + End); + I = End; + } + } + + // After the end of the buckets, it wraps around to the beginning, + // so we sort them as one unit. sortBuckets() didn't handle this + // corner case. + void sortWrapAround() { + if (Buckets[0].Size.load() == 0 || Buckets[NumBuckets - 1].Size.load() == 0) + return; + + size_t First = 0; + while (Buckets[First].Size.load()) + ++First; + + size_t Last = NumBuckets - 1; + while (Buckets[Last - 1].Size.load()) + --Last; + + std::vector Vec; + Vec.insert(Vec.end(), Buckets, Buckets + First); + Vec.insert(Vec.end(), Buckets + Last, Buckets + NumBuckets); + + std::sort(Vec.begin(), Vec.end()); + + size_t LastSize = NumBuckets - Last; + std::copy(Vec.begin(), Vec.begin() + LastSize, Buckets + Last); + std::copy(Vec.begin() + LastSize, Vec.end(), Buckets); + } + + size_t StringTableSize = 0; + size_t Alignment; + size_t NumBuckets; + EntryTy *Buckets; +}; + template MergeOutputSection::MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags, uintX_t Alignment) : OutputSectionBase(Name, Type, Flags), - Builder(StringTableBuilder::RAW, Alignment) {} + Builder(StringTableBuilder::RAW, Alignment), StringAlignment(Alignment) {} template void MergeOutputSection::writeTo(uint8_t *Buf) { - Builder.write(Buf); + if (ParallelBuilder) + ParallelBuilder->writeTo(Buf); + else + Builder.write(Buf); } template @@ -525,10 +738,27 @@ } template void MergeOutputSection::finalize() { - if (shouldTailMerge()) + if (shouldTailMerge()) { finalizeTailMerge(); - else - finalizeNoTailMerge(); + return; + } + + size_t NumPieces = 0; + for (MergeInputSection *Sec : Sections) + NumPieces += Sec->Pieces.size(); + ParallelBuilder = + new ParallelStringTableBuilder(NumPieces / 2, StringAlignment); + + parallel_for_each( + Sections.begin(), Sections.end(), [&](MergeInputSection *Sec) { + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + ParallelBuilder->insert(Sec->Pieces[I], Sec->getData(I)); + }); + + ParallelBuilder->finalize(); + + this->Size = ParallelBuilder->size(); } template