Index: lld/ELF/InputSection.h =================================================================== --- lld/ELF/InputSection.h +++ lld/ELF/InputSection.h @@ -240,7 +240,7 @@ End = Pieces[I + 1].InputOff; StringRef S = {(const char *)(this->Data.data() + Begin), End - Begin}; - return {S, Hashes[I]}; + return {S, (uint32_t)Hashes[I]}; } // Returns the SectionPiece at a given input section offset. @@ -249,11 +249,12 @@ SyntheticSection *getParent() const; + std::vector Hashes; + private: void splitStrings(ArrayRef A, size_t Size); void splitNonStrings(ArrayRef A, size_t Size); - std::vector Hashes; mutable llvm::DenseMap OffsetMap; mutable llvm::once_flag InitOffsetMap; Index: lld/ELF/SyntheticSections.h =================================================================== --- lld/ELF/SyntheticSections.h +++ lld/ELF/SyntheticSections.h @@ -668,24 +668,26 @@ class MergeSyntheticSection : public SyntheticSection { public: void addSection(MergeInputSection *MS); - size_t getSize() const override; - void writeTo(uint8_t *Buf) override; protected: MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags, - uint32_t Alignment); + uint32_t Alignment) + : SyntheticSection(Flags, Type, Alignment, Name) {} std::vector Sections; - llvm::StringTableBuilder Builder; }; class MergeTailSection final : public MergeSyntheticSection { public: MergeTailSection(StringRef Name, uint32_t Type, uint64_t Flags, - uint32_t Alignment) - : MergeSyntheticSection(Name, Type, Flags, Alignment) {} + uint32_t Alignment); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; void finalizeContents() override; + +private: + llvm::StringTableBuilder Builder; }; class MergeNoTailSection final : public MergeSyntheticSection { @@ -694,7 +696,27 @@ uint32_t Alignment) : MergeSyntheticSection(Name, Type, Flags, Alignment) {} + size_t getSize() const override { return Size; } + void writeTo(uint8_t *Buf) override; void finalizeContents() override; + +private: + // We use the most significant bits of a hash as a shard ID. + // The reason why we don't want to use the least significant bits is + // because DenseMap also uses lower bits to determine a bucket ID. + // If we use lower bits, it significantly increases the probability of + // hash collisons. + size_t getShardId(size_t Hash) { + return Hash >> (sizeof(size_t) * 8 - llvm::countTrailingZeros(NumShards)); + } + + // Section size + size_t Size; + + // String table contents + constexpr static size_t NumShards = 32; + std::vector Shards; + size_t ShardOffsets[NumShards]; }; // .MIPS.abiflags section. Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/SHA1.h" #include "llvm/Support/xxhash.h" #include +#include using namespace llvm; using namespace llvm::dwarf; @@ -48,6 +49,8 @@ using namespace lld; using namespace lld::elf; +const size_t MergeNoTailSection::NumShards; + uint64_t SyntheticSection::getVA() const { if (OutputSection *Sec = getParent()) return Sec->Addr + OutSecOff; @@ -2177,19 +2180,19 @@ return getNeedNum() == 0; } -MergeSyntheticSection::MergeSyntheticSection(StringRef Name, uint32_t Type, - uint64_t Flags, uint32_t Alignment) - : SyntheticSection(Flags, Type, Alignment, Name), - Builder(StringTableBuilder::RAW, Alignment) {} - void MergeSyntheticSection::addSection(MergeInputSection *MS) { MS->Parent = this; Sections.push_back(MS); } -size_t MergeSyntheticSection::getSize() const { return Builder.getSize(); } +MergeTailSection::MergeTailSection(StringRef Name, uint32_t Type, + uint64_t Flags, uint32_t Alignment) + : MergeSyntheticSection(Name, Type, Flags, Alignment), + Builder(StringTableBuilder::RAW, Alignment) {} + +size_t MergeTailSection::getSize() const { return Builder.getSize(); } -void MergeSyntheticSection::writeTo(uint8_t *Buf) { Builder.write(Buf); } +void MergeTailSection::writeTo(uint8_t *Buf) { Builder.write(Buf); } void MergeTailSection::finalizeContents() { // Add all string pieces to the string table builder to create section @@ -2211,17 +2214,61 @@ Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); } +void MergeNoTailSection::writeTo(uint8_t *Buf) { + for (size_t I = 0; I < NumShards; ++I) + Shards[I].write(Buf + ShardOffsets[I]); +} + +// This function is very hot (i.e. it can take several seconds to finish) +// because sometimes the number of inputs is in an order of magnitude of +// millions. So, we use multi-threading. +// +// For any strings S and T, we know S is not mergeable with T if S's hash +// value is different from T's. If that's the case, we can safely put S and +// T into different string builders without worrying about merge misses. +// We do it in parallel. void MergeNoTailSection::finalizeContents() { - // Add all string pieces to the string table builder to create section - // contents. Because we are not tail-optimizing, offsets of strings are - // fixed when they are added to the builder (string table builder contains - // a hash table from strings to offsets). - for (MergeInputSection *Sec : Sections) + // Initializes string table builders. + for (size_t I = 0; I < NumShards; ++I) + Shards.emplace_back(StringTableBuilder::RAW, Alignment); + + // Concurrency level. Must be a power of 2. + size_t Parallelism = + std::min(PowerOf2Floor(std::thread::hardware_concurrency()), NumShards); + + // Add section pieces to the builders. + parallelForEachN(0, Parallelism, [&](size_t ThreadId) { + for (MergeInputSection *Sec : Sections) { + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { + if (!Sec->Pieces[I].Live) + continue; + size_t ShardId = getShardId(Sec->Hashes[I]); + if ((ShardId & (Parallelism - 1)) == ThreadId) + Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I)); + } + } + }); + + for (size_t I = 0; I < NumShards; ++I) + Shards[I].finalizeInOrder(); + + // Compute an in-section offset for each shard. + size_t Off = 0; + for (size_t I = 0; I < NumShards; ++I) { + if (Shards[I].getSize() > 0) + Off = alignTo(Off, Alignment); + ShardOffsets[I] = Off; + Off += Shards[I].getSize(); + } + Size = Off; + + // So far, section pieces have offsets from beginning of shards, but we + // want offsets from beginning of the whole section. Fix them. + parallelForEach(Sections, [&](MergeInputSection *Sec) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); - - Builder.finalizeInOrder(); + Sec->Pieces[I].OutputOff += ShardOffsets[getShardId(Sec->Hashes[I])]; + }); } static MergeSyntheticSection *createMergeSynthetic(StringRef Name, Index: lld/test/ELF/comment-gc.s =================================================================== --- lld/test/ELF/comment-gc.s +++ lld/test/ELF/comment-gc.s @@ -5,8 +5,7 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .comment: -# CHECK-NEXT: 0000 00666f6f 00626172 004c4c44 20312e30 .foo.bar.LLD 1.0 -# CHECK-NEXT: 0010 00 . +# CHECK-NEXT: .foo.LLD 1.0.bar .ident "foo" Index: lld/test/ELF/compress-debug-sections.s =================================================================== --- lld/test/ELF/compress-debug-sections.s +++ lld/test/ELF/compress-debug-sections.s @@ -18,8 +18,8 @@ # RUN: llvm-dwarfdump %t1 -debug-str | \ # RUN: FileCheck %s --check-prefix=DEBUGSTR # DEBUGSTR: .debug_str contents: -# DEBUGSTR-NEXT: AAAAAAAAAAAAAAAAAAAAAAAAAAA # DEBUGSTR-NEXT: BBBBBBBBBBBBBBBBBBBBBBBBBBB +# DEBUGSTR-NEXT: AAAAAAAAAAAAAAAAAAAAAAAAAAA ## Test alias. # RUN: ld.lld %t.o -o %t2 --compress-debug-sections zlib Index: lld/test/ELF/compressed-debug-input.s =================================================================== --- lld/test/ELF/compressed-debug-input.s +++ lld/test/ELF/compressed-debug-input.s @@ -39,9 +39,6 @@ # GNU-NEXT: EntrySize: 1 # GNU-NEXT: } -# RUN: ld.lld %t -o %t.so -shared -# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=DATA %s - # RUN: ld.lld %t2 -o %t2.so -shared # RUN: llvm-readobj -sections -section-data %t2.so | FileCheck -check-prefix=DATA %s @@ -61,11 +58,11 @@ # DATA-NEXT: AddressAlignment: 1 # DATA-NEXT: EntrySize: 0 # DATA-NEXT: SectionData ( -# DATA-NEXT: 0000: 73686F72 7420756E 7369676E 65642069 |short unsigned i| -# DATA-NEXT: 0010: 6E740075 6E736967 6E656420 696E7400 |nt.unsigned int.| -# DATA-NEXT: 0020: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in| -# DATA-NEXT: 0030: 74006368 61720075 6E736967 6E656420 |t.char.unsigned | -# DATA-NEXT: 0040: 63686172 00 |char.| +# DATA-NEXT: 0000: 756E7369 676E6564 20636861 72006368 |unsigned char.ch| +# DATA-NEXT: 0010: 61720073 686F7274 20756E73 69676E65 |ar.short unsigne| +# DATA-NEXT: 0020: 6420696E 74006C6F 6E672075 6E736967 |d int.long unsig| +# DATA-NEXT: 0030: 6E656420 696E7400 756E7369 676E6564 |ned int.unsigned| +# DATA-NEXT: 0040: 20696E74 00 | int.| # DATA-NEXT: ) # DATA-NEXT: } Index: lld/test/ELF/debug-gc.s =================================================================== --- lld/test/ELF/debug-gc.s +++ lld/test/ELF/debug-gc.s @@ -4,11 +4,11 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .debug_str: -# CHECK-NEXT: 0000 41414100 42424200 43434300 AAA.BBB.CCC. +# CHECK-NEXT: 0000 42424200 43434300 41414100 BBB.CCC.AAA. # CHECK: Contents of section .foo: # CHECK-NEXT: 0000 2a000000 # CHECK: Contents of section .debug_info: -# CHECK-NEXT: 0000 00000000 04000000 +# CHECK-NEXT: 0000 08000000 00000000 .globl _start _start: Index: lld/test/ELF/merge-string.s =================================================================== --- lld/test/ELF/merge-string.s +++ lld/test/ELF/merge-string.s @@ -54,7 +54,7 @@ // NOTAIL-NEXT: AddressAlignment: 1 // NOTAIL-NEXT: EntrySize: 0 // NOTAIL-NEXT: SectionData ( -// NOTAIL-NEXT: 0000: 61626300 626300 |abc.bc.| +// NOTAIL-NEXT: 0000: 62630061 626300 |bc.abc.| // NOTAIL-NEXT: ) // NOMERGE: Name: .rodata1 Index: lld/test/ELF/merge.s =================================================================== --- lld/test/ELF/merge.s +++ lld/test/ELF/merge.s @@ -31,17 +31,17 @@ // CHECK-NEXT: AddressAlignment: 4 // CHECK-NEXT: EntrySize: 0 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 10000000 42000000 +// CHECK-NEXT: 0000: 42000000 10000000 // CHECK-NEXT: ) -// Address of the constant 0x10 = 0x200120 = 2097440 -// Address of the constant 0x42 = 0x200124 = 2097444 +// Address of the constant 0x42 = 0x200120 = 2097440 +// Address of the constant 0x10 = 0x200124 = 2097444 // CHECK: Symbols [ // CHECK: Name: bar -// CHECK-NEXT: Value: 0x200124 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Loca // CHECK-NEXT: Type: None @@ -49,7 +49,7 @@ // CHECK-NEXT: Section: .mysec // CHECK: Name: zed -// CHECK-NEXT: Value: 0x200124 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local // CHECK-NEXT: Type: None @@ -57,7 +57,7 @@ // CHECK-NEXT: Section: .mysec // CHECK: Name: foo -// CHECK-NEXT: Value: 0x200124 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local // CHECK-NEXT: Type: None @@ -75,37 +75,37 @@ // DISASM-NEXT: _start: movl .mysec, %eax -// addr(0x10) = 2097440 -// DISASM-NEXT: movl 2097440, %eax +// addr(0x10) = 2097444 +// DISASM-NEXT: movl 2097444, %eax movl .mysec+7, %eax -// addr(0x42) + 3 = 2097444 + 3 = 2097447 -// DISASM-NEXT: movl 2097447, %eax +// addr(0x42) + 3 = 2097440 + 3 +// DISASM-NEXT: movl 2097443, %eax movl .mysec+8, %eax -// addr(0x42) = 2097444 -// DISASM-NEXT: movl 2097444, %eax +// addr(0x42) = 2097440 +// DISASM-NEXT: movl 2097440, %eax movl bar+7, %eax -// addr(0x42) + 7 = 2097444 + 7 = 2097451 -// DISASM-NEXT: movl 2097451, %eax +// addr(0x42) + 7 = 2097440 + 7 +// DISASM-NEXT: movl 2097447, %eax movl bar+8, %eax -// addr(0x42) + 8 = 2097444 + 8 = 2097452 -// DISASM-NEXT: movl 2097452, %eax +// addr(0x42) + 8 = 2097440 + 8 +// DISASM-NEXT: movl 2097448, %eax movl foo, %eax -// addr(0x42) = 2097444 -// DISASM-NEXT: movl 2097444, %eax +// addr(0x42) = 2097440 +// DISASM-NEXT: movl 2097440, %eax movl foo+7, %eax -// addr(0x42) + 7 = = 2097444 + 7 = 2097451 -// DISASM-NEXT: movl 2097451, %eax +// addr(0x42) + 7 = = 2097440 + 7 +// DISASM-NEXT: movl 2097447, %eax movl foo+8, %eax -// addr(0x42) + 8 = = 2097444 + 8 = 2097452 -// DISASM-NEXT: movl 2097452, %eax +// addr(0x42) + 8 = = 2097440 + 8 +// DISASM-NEXT: movl 2097448, %eax // From the other file: movl .mysec, %eax -// addr(0x42) = 2097444 -// DISASM-NEXT: movl 2097444, %eax +// addr(0x42) = 2097440 +// DISASM-NEXT: movl 2097440, %eax Index: lld/test/ELF/string-gc.s =================================================================== --- lld/test/ELF/string-gc.s +++ lld/test/ELF/string-gc.s @@ -14,7 +14,7 @@ // CHECK-NEXT: } // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: s3 -// CHECK-NEXT: Value: 0x200125 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Object (0x1) @@ -23,7 +23,7 @@ // CHECK-NEXT: } // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: s1 -// CHECK-NEXT: Value: 0x200120 +// CHECK-NEXT: Value: 0x200125 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Object (0x1)