Index: lld/ELF/InputSection.h =================================================================== --- lld/ELF/InputSection.h +++ lld/ELF/InputSection.h @@ -249,11 +249,12 @@ SyntheticSection *getParent() const; + std::vector Hashes; + private: void splitStrings(ArrayRef A, size_t Size); void splitNonStrings(ArrayRef A, size_t Size); - std::vector Hashes; mutable llvm::DenseMap OffsetMap; mutable llvm::once_flag InitOffsetMap; Index: lld/ELF/SyntheticSections.h =================================================================== --- lld/ELF/SyntheticSections.h +++ lld/ELF/SyntheticSections.h @@ -664,24 +664,26 @@ class MergeSyntheticSection : public SyntheticSection { public: void addSection(MergeInputSection *MS); - size_t getSize() const override; - void writeTo(uint8_t *Buf) override; protected: MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags, - uint32_t Alignment); + uint32_t Alignment) + : SyntheticSection(Flags, Type, Alignment, Name) {} std::vector Sections; - llvm::StringTableBuilder Builder; }; class MergeTailSection final : public MergeSyntheticSection { public: MergeTailSection(StringRef Name, uint32_t Type, uint64_t Flags, - uint32_t Alignment) - : MergeSyntheticSection(Name, Type, Flags, Alignment) {} + uint32_t Alignment); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; void finalizeContents() override; + +private: + llvm::StringTableBuilder Builder; }; class MergeNoTailSection final : public MergeSyntheticSection { @@ -690,7 +692,20 @@ uint32_t Alignment) : MergeSyntheticSection(Name, Type, Flags, Alignment) {} + size_t getSize() const override { return Size; } + void writeTo(uint8_t *Buf) override; void finalizeContents() override; + +private: + size_t Size; + + // Parallelism. Changing this number causes benign changes in the + // order of output section pieces. For build reproducibility, we + // always use the same number. + static constexpr size_t NumShards = 8; + + std::vector Shards; + size_t ShardOffsets[NumShards]; }; // .MIPS.abiflags section. Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -2175,19 +2175,19 @@ return getNeedNum() == 0; } -MergeSyntheticSection::MergeSyntheticSection(StringRef Name, uint32_t Type, - uint64_t Flags, uint32_t Alignment) - : SyntheticSection(Flags, Type, Alignment, Name), - Builder(StringTableBuilder::RAW, Alignment) {} - void MergeSyntheticSection::addSection(MergeInputSection *MS) { MS->Parent = this; Sections.push_back(MS); } -size_t MergeSyntheticSection::getSize() const { return Builder.getSize(); } +MergeTailSection::MergeTailSection(StringRef Name, uint32_t Type, + uint64_t Flags, uint32_t Alignment) + : MergeSyntheticSection(Name, Type, Flags, Alignment), + Builder(StringTableBuilder::RAW, Alignment) {} -void MergeSyntheticSection::writeTo(uint8_t *Buf) { Builder.write(Buf); } +size_t MergeTailSection::getSize() const { return Builder.getSize(); } + +void MergeTailSection::writeTo(uint8_t *Buf) { Builder.write(Buf); } void MergeTailSection::finalizeContents() { // Add all string pieces to the string table builder to create section @@ -2209,17 +2209,58 @@ Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); } +void MergeNoTailSection::writeTo(uint8_t *Buf) { + for (size_t I = 0; I < NumShards; ++I) + Shards[I].write(Buf + ShardOffsets[I]); +} + +// This function is very hot (i.e. can takes several seconds to finish) +// because sometimes the number of inputs is in an order of magnitude of +// millions. So, we use multi-threading. +// +// For any strings S and T, we know S is not mergeable with T if S's hash +// value is different from T's. If that's the case, we can safely put S and +// T into different string builders without worrying about merge misses. +// We do it in parallel. +// +// Even though skipping unwanted section pieces is much faster than putting +// them into hash tables, the cost of doing it is not free. Therefore, +// this function is slower than a single-threaded version if we have few +// number of cores. I think it's acceptable because it looks like dual core +// is a break-even for this function. void MergeNoTailSection::finalizeContents() { - // Add all string pieces to the string table builder to create section - // contents. Because we are not tail-optimizing, offsets of strings are - // fixed when they are added to the builder (string table builder contains - // a hash table from strings to offsets). - for (MergeInputSection *Sec : Sections) + // Initializes string table builders. + for (size_t I = 0; I < NumShards; ++I) + Shards.emplace_back(StringTableBuilder::RAW, Alignment); + + // Add section pieces to the builders. + parallelForEachN(0, NumShards, [&](size_t ShardId) { + for (MergeInputSection *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live && Sec->Hashes[I] % NumShards == ShardId) + Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I)); + }); + + for (size_t I = 0; I < NumShards; ++I) + Shards[I].finalizeInOrder(); + + // Compute an in-section offset for each shard. + size_t Off = 0; + for (size_t I = 0; I < NumShards; ++I) { + if (Shards[I].getSize() > 0) + Off = alignTo(Off, Alignment); + ShardOffsets[I] = Off; + Off += Shards[I].getSize(); + } + Size = Off; + + // So far, section pieces have offsets from beginning of shards, but we + // want offsets from beginning of the whole section. Fix them. + parallelForEach(Sections, [&](MergeInputSection *Sec) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); - - Builder.finalizeInOrder(); + Sec->Pieces[I].OutputOff += ShardOffsets[Sec->Hashes[I] % NumShards]; + }); } static MergeSyntheticSection *createMergeSynthetic(StringRef Name, Index: lld/test/ELF/comment-gc.s =================================================================== --- lld/test/ELF/comment-gc.s +++ lld/test/ELF/comment-gc.s @@ -5,8 +5,7 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .comment: -# CHECK-NEXT: 0000 00666f6f 00626172 004c4c44 20312e30 .foo.bar.LLD 1.0 -# CHECK-NEXT: 0010 00 . +# CHECK-NEXT: bar.LLD 1.0.foo. .ident "foo" Index: lld/test/ELF/compressed-debug-input.s =================================================================== --- lld/test/ELF/compressed-debug-input.s +++ lld/test/ELF/compressed-debug-input.s @@ -39,9 +39,6 @@ # GNU-NEXT: EntrySize: 1 # GNU-NEXT: } -# RUN: ld.lld %t -o %t.so -shared -# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=DATA %s - # RUN: ld.lld %t2 -o %t2.so -shared # RUN: llvm-readobj -sections -section-data %t2.so | FileCheck -check-prefix=DATA %s @@ -62,10 +59,10 @@ # DATA-NEXT: EntrySize: 0 # DATA-NEXT: SectionData ( # DATA-NEXT: 0000: 73686F72 7420756E 7369676E 65642069 |short unsigned i| -# DATA-NEXT: 0010: 6E740075 6E736967 6E656420 696E7400 |nt.unsigned int.| -# DATA-NEXT: 0020: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in| -# DATA-NEXT: 0030: 74006368 61720075 6E736967 6E656420 |t.char.unsigned | -# DATA-NEXT: 0040: 63686172 00 |char.| +# DATA-NEXT: 0010: 6E740063 68617200 756E7369 676E6564 |nt.char.unsigned| +# DATA-NEXT: 0020: 20636861 7200756E 7369676E 65642069 | char.unsigned i| +# DATA-NEXT: 0030: 6E74006C 6F6E6720 756E7369 676E6564 |nt.long unsigned| +# DATA-NEXT: 0040: 20696E74 00 | int.| # DATA-NEXT: ) # DATA-NEXT: } Index: lld/test/ELF/debug-gc.s =================================================================== --- lld/test/ELF/debug-gc.s +++ lld/test/ELF/debug-gc.s @@ -4,11 +4,11 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .debug_str: -# CHECK-NEXT: 0000 41414100 42424200 43434300 AAA.BBB.CCC. +# CHECK-NEXT: 0000 41414100 43434300 42424200 AAA.CCC.BBB. # CHECK: Contents of section .foo: # CHECK-NEXT: 0000 2a000000 # CHECK: Contents of section .debug_info: -# CHECK-NEXT: 0000 00000000 04000000 +# CHECK-NEXT: 0000 00000000 08000000 .globl _start _start: Index: lld/test/ELF/gc-sections-merge.s =================================================================== --- lld/test/ELF/gc-sections-merge.s +++ lld/test/ELF/gc-sections-merge.s @@ -20,7 +20,7 @@ // CHECK-NEXT: AddressAlignment: 1 // CHECK-NEXT: EntrySize: 0 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 666F6F00 62617200 |foo.bar.| +// CHECK-NEXT: 0000: 62617200 666F6F00 |bar.foo.| // CHECK-NEXT: ) // GC: Name: .rodata Index: lld/test/ELF/merge-string-align.s =================================================================== --- lld/test/ELF/merge-string-align.s +++ lld/test/ELF/merge-string-align.s @@ -30,8 +30,8 @@ // CHECK-NEXT: AddressAlignment: 16 // CHECK-NEXT: EntrySize: // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 666F6F00 00000000 00000000 00000000 |foo.............| -// CHECK-NEXT: 0010: 62617200 |bar.| +// CHECK-NEXT: 0000: 62617200 00000000 00000000 00000000 |bar.............| +// CHECK-NEXT: 0010: 666F6F00 |foo.| // CHECK-NEXT: ) .section .rodata2,"aMS",@progbits,1 Index: lld/test/ELF/merge.s =================================================================== --- lld/test/ELF/merge.s +++ lld/test/ELF/merge.s @@ -31,17 +31,17 @@ // CHECK-NEXT: AddressAlignment: 4 // CHECK-NEXT: EntrySize: 0 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 10000000 42000000 +// CHECK-NEXT: 0000: 42000000 10000000 // CHECK-NEXT: ) -// Address of the constant 0x10 = 0x200120 = 2097440 -// Address of the constant 0x42 = 0x200124 = 2097444 +// Address of the constant 0x42 = 0x200120 = 2097440 +// Address of the constant 0x10 = 0x200124 = 2097444 // CHECK: Symbols [ // CHECK: Name: bar -// CHECK-NEXT: Value: 0x200124 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Loca // CHECK-NEXT: Type: None @@ -49,7 +49,7 @@ // CHECK-NEXT: Section: .mysec // CHECK: Name: zed -// CHECK-NEXT: Value: 0x200124 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local // CHECK-NEXT: Type: None @@ -57,7 +57,7 @@ // CHECK-NEXT: Section: .mysec // CHECK: Name: foo -// CHECK-NEXT: Value: 0x200124 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local // CHECK-NEXT: Type: None @@ -75,37 +75,37 @@ // DISASM-NEXT: _start: movl .mysec, %eax -// addr(0x10) = 2097440 -// DISASM-NEXT: movl 2097440, %eax +// addr(0x10) = 2097444 +// DISASM-NEXT: movl 2097444, %eax movl .mysec+7, %eax -// addr(0x42) + 3 = 2097444 + 3 = 2097447 -// DISASM-NEXT: movl 2097447, %eax +// addr(0x42) + 3 = 2097440 + 3 +// DISASM-NEXT: movl 2097443, %eax movl .mysec+8, %eax -// addr(0x42) = 2097444 -// DISASM-NEXT: movl 2097444, %eax +// addr(0x42) = 2097440 +// DISASM-NEXT: movl 2097440, %eax movl bar+7, %eax -// addr(0x42) + 7 = 2097444 + 7 = 2097451 -// DISASM-NEXT: movl 2097451, %eax +// addr(0x42) + 7 = 2097440 + 7 +// DISASM-NEXT: movl 2097447, %eax movl bar+8, %eax -// addr(0x42) + 8 = 2097444 + 8 = 2097452 -// DISASM-NEXT: movl 2097452, %eax +// addr(0x42) + 8 = 2097440 + 8 +// DISASM-NEXT: movl 2097448, %eax movl foo, %eax -// addr(0x42) = 2097444 -// DISASM-NEXT: movl 2097444, %eax +// addr(0x42) = 2097440 +// DISASM-NEXT: movl 2097440, %eax movl foo+7, %eax -// addr(0x42) + 7 = = 2097444 + 7 = 2097451 -// DISASM-NEXT: movl 2097451, %eax +// addr(0x42) + 7 = = 2097440 + 7 +// DISASM-NEXT: movl 2097447, %eax movl foo+8, %eax -// addr(0x42) + 8 = = 2097444 + 8 = 2097452 -// DISASM-NEXT: movl 2097452, %eax +// addr(0x42) + 8 = = 2097440 + 8 +// DISASM-NEXT: movl 2097448, %eax // From the other file: movl .mysec, %eax -// addr(0x42) = 2097444 -// DISASM-NEXT: movl 2097444, %eax +// addr(0x42) = 2097440 +// DISASM-NEXT: movl 2097440, %eax