diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -236,7 +236,8 @@ uint32_t InputOff; uint32_t Live : 1; - uint32_t Hash : 31; + uint32_t ShardId : 5; + uint32_t Hash : 26; uint64_t OutputOff = 0; }; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -836,25 +836,34 @@ class MergeSyntheticSection : public SyntheticSection { public: void addSection(MergeInputSection *MS); + size_t getSize() const override { return Size; } + void writeTo(uint8_t *Buf) override; + std::vector Sections; protected: MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags, - uint32_t Alignment) - : SyntheticSection(Flags, Type, Alignment, Name) {} + uint32_t Alignment); + + // Section size + size_t Size; + + // Concurrency level. + size_t Concurrency; + + // String table contents + constexpr static size_t NumShards = 32; + std::vector Shards; + size_t ShardOffsets[NumShards]; }; class MergeTailSection final : public MergeSyntheticSection { public: MergeTailSection(StringRef Name, uint32_t Type, uint64_t Flags, - uint32_t Alignment); + uint32_t Alignment) + : MergeSyntheticSection(Name, Type, Flags, Alignment) {} - size_t getSize() const override; - void writeTo(uint8_t *Buf) override; void finalizeContents() override; - -private: - llvm::StringTableBuilder Builder; }; class MergeNoTailSection final : public MergeSyntheticSection { @@ -863,8 +872,6 @@ uint32_t Alignment) : MergeSyntheticSection(Name, Type, Flags, Alignment) {} - size_t getSize() const override { return Size; } - void writeTo(uint8_t *Buf) override; void finalizeContents() override; private: @@ -877,14 +884,6 @@ assert((Hash >> 31) == 0); return Hash >> (31 - llvm::countTrailingZeros(NumShards)); } - - // Section size - size_t Size; - - // String table contents - constexpr static size_t NumShards = 32; - std::vector Shards; - size_t ShardOffsets[NumShards]; }; // .MIPS.abiflags section. diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -52,7 +52,7 @@ using llvm::support::endian::write32le; using llvm::support::endian::write64le; -constexpr size_t MergeNoTailSection::NumShards; +constexpr size_t MergeSyntheticSection::NumShards; static uint64_t readUint(uint8_t *Buf) { return Config->Is64 ? read64(Buf) : read32(Buf); @@ -2916,41 +2916,28 @@ return SharedFile::VernauxNum != 0; } +MergeSyntheticSection::MergeSyntheticSection(StringRef Name, uint32_t Type, + uint64_t Flags, uint32_t Alignment) + : SyntheticSection(Flags, Type, Alignment, Name) { + // Initialize string builders. + for (size_t I = 0; I < NumShards; ++I) + Shards.emplace_back(StringTableBuilder::RAW, Alignment); + + // Initializes concurrency level. Must be a power of 2 to avoid + // the expensive modulo operation when dividing with this value. + if (ThreadsEnabled) + Concurrency = + std::min(PowerOf2Floor(hardware_concurrency()), NumShards); + else + Concurrency = 1; +} + void MergeSyntheticSection::addSection(MergeInputSection *MS) { MS->Parent = this; Sections.push_back(MS); } -MergeTailSection::MergeTailSection(StringRef Name, uint32_t Type, - uint64_t Flags, uint32_t Alignment) - : MergeSyntheticSection(Name, Type, Flags, Alignment), - Builder(StringTableBuilder::RAW, Alignment) {} - -size_t MergeTailSection::getSize() const { return Builder.getSize(); } - -void MergeTailSection::writeTo(uint8_t *Buf) { Builder.write(Buf); } - -void MergeTailSection::finalizeContents() { - // Add all string pieces to the string table builder to create section - // contents. - for (MergeInputSection *Sec : Sections) - for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) - if (Sec->Pieces[I].Live) - Builder.add(Sec->getData(I)); - - // Fix the string table content. After this, the contents will never change. - Builder.finalize(); - - // finalize() fixed tail-optimized strings, so we can now get - // offsets of strings. Get an offset for each string and save it - // to a corresponding StringPiece for easy access. - for (MergeInputSection *Sec : Sections) - for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) - if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); -} - -void MergeNoTailSection::writeTo(uint8_t *Buf) { +void MergeSyntheticSection::writeTo(uint8_t *Buf) { for (size_t I = 0; I < NumShards; ++I) Shards[I].write(Buf + ShardOffsets[I]); } @@ -2964,17 +2951,6 @@ // T into different string builders without worrying about merge misses. // We do it in parallel. void MergeNoTailSection::finalizeContents() { - // Initializes string table builders. - for (size_t I = 0; I < NumShards; ++I) - Shards.emplace_back(StringTableBuilder::RAW, Alignment); - - // Concurrency level. Must be a power of 2 to avoid expensive modulo - // operations in the following tight loop. - size_t Concurrency = 1; - if (ThreadsEnabled) - Concurrency = - std::min(PowerOf2Floor(hardware_concurrency()), NumShards); - // Add section pieces to the builders. parallelForEachN(0, Concurrency, [&](size_t ThreadId) { for (MergeInputSection *Sec : Sections) { @@ -3009,6 +2985,65 @@ }); } +// This function basically does the same thing as MergeNoTailSection +// does but creates a tail-merged string table. E.g. "def\0" is merged +// with "abcdef\0". This is enabled only when -O2 is given. +// +// Input strings are sharded based on their very last few characters. +// This works because, if strings S and T can be tail-merged, S and T +// must have the same tail substring. Currently, we use the last 4 +// characters for sharding. +// +// Note that strings shorter than 4 characters may not be tail-merged. +// That is okay because the loss seems negligible. My observation is +// that a loss is 8 KiB when we create a 1.7 GiB clang executable. +void MergeTailSection::finalizeContents() { + // Initializes TailHashes by hash values of last few characters. + parallelForEach(Sections, [&](MergeInputSection *Sec) { + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].ShardId = + hash_value(Sec->getData(I).val().take_back(4)); + }); + + // Add section pieces to the builders. + parallelForEachN(0, Concurrency, [&](size_t ThreadId) { + for (MergeInputSection *Sec : Sections) { + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { + if (!Sec->Pieces[I].Live) + continue; + size_t ShardId = Sec->Pieces[I].ShardId; + assert(ShardId < (1UL << NumShards)); + if ((ShardId & (Concurrency - 1)) == ThreadId) + Shards[ShardId].add(Sec->getData(I)); + } + } + }); + + parallelForEach(Shards, [](StringTableBuilder &S) { S.finalize(); }); + + // Compute an in-section offset for each shard. + size_t Off = 0; + for (size_t I = 0; I < NumShards; ++I) { + if (Shards[I].getSize() > 0) + Off = alignTo(Off, Alignment); + ShardOffsets[I] = Off; + Off += Shards[I].getSize(); + } + Size = Off; + + // Set to section piece offsets. + parallelForEach(Sections, [&](MergeInputSection *Sec) { + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { + if (!Sec->Pieces[I].Live) + continue; + size_t ShardId = Sec->Pieces[I].ShardId; + Sec->Pieces[I].OutputOff = + ShardOffsets[ShardId] + Shards[ShardId].getOffset(Sec->getData(I)); + } + }); +} + static MergeSyntheticSection *createMergeSynthetic(StringRef Name, uint32_t Type, uint64_t Flags, diff --git a/lld/test/ELF/comment-gc.s b/lld/test/ELF/comment-gc.s --- a/lld/test/ELF/comment-gc.s +++ b/lld/test/ELF/comment-gc.s @@ -5,7 +5,7 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .comment: -# CHECK-NEXT: foo..LLD 1.0.bar +# CHECK-NEXT: foo.bar.LLD 1.0 .ident "foo" diff --git a/lld/test/ELF/compressed-debug-input.s b/lld/test/ELF/compressed-debug-input.s --- a/lld/test/ELF/compressed-debug-input.s +++ b/lld/test/ELF/compressed-debug-input.s @@ -61,11 +61,11 @@ # DATA-NEXT: AddressAlignment: 1 # DATA-NEXT: EntrySize: 1 # DATA-NEXT: SectionData ( -# DATA-NEXT: 0000: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in| -# DATA-NEXT: 0010: 7400756E 7369676E 65642063 68617200 |t.unsigned char.| -# DATA-NEXT: 0020: 756E7369 676E6564 20696E74 00636861 |unsigned int.cha| -# DATA-NEXT: 0030: 72007368 6F727420 756E7369 676E6564 |r.short unsigned| -# DATA-NEXT: 0040: 20696E74 00 | int.| +# DATA-NEXT: 0000: 73686F72 7420756E 7369676E 65642069 |short unsigned i| +# DATA-NEXT: 0010: 6E740075 6E736967 6E656420 696E7400 |nt.unsigned int.| +# DATA-NEXT: 0020: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in| +# DATA-NEXT: 0030: 74006368 61720075 6E736967 6E656420 |t.char.unsigned | +# DATA-NEXT: 0040: 63686172 00 |char.| # DATA-NEXT: ) # DATA-NEXT: } diff --git a/lld/test/ELF/debug-gc.s b/lld/test/ELF/debug-gc.s --- a/lld/test/ELF/debug-gc.s +++ b/lld/test/ELF/debug-gc.s @@ -4,11 +4,11 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .debug_str: -# CHECK-NEXT: 0000 41414100 43434300 42424200 AAA.CCC.BBB. +# CHECK-NEXT: 0000 41414100 42424200 43434300 AAA.BBB.CCC. # CHECK: Contents of section .foo: # CHECK-NEXT: 0000 2a000000 # CHECK: Contents of section .debug_info: -# CHECK-NEXT: 0000 00000000 08000000 +# CHECK-NEXT: 0000 00000000 04000000 .globl _start _start: diff --git a/lld/test/ELF/emit-relocs-mergeable-i386.s b/lld/test/ELF/emit-relocs-mergeable-i386.s --- a/lld/test/ELF/emit-relocs-mergeable-i386.s +++ b/lld/test/ELF/emit-relocs-mergeable-i386.s @@ -22,7 +22,7 @@ # CHECK-NEXT: AddressAlignment: # CHECK-NEXT: EntrySize: # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 00000000 08000000 08000000 04000000 +# CHECK-NEXT: 0000: 00000000 04000000 04000000 08000000 # CHECK-NEXT: ) # CHECK-NEXT: } @@ -43,7 +43,7 @@ # CHECK-NEXT: AddressAlignment: # CHECK-NEXT: EntrySize: # CHECK-NEXT: SectionData ( -# CHECK-NEXT: |AAA.CCC.BBB.| +# CHECK-NEXT: |AAA.BBB.CCC.| # CHECK-NEXT: ) # CHECK-NEXT: } diff --git a/lld/test/ELF/emit-relocs-mergeable.s b/lld/test/ELF/emit-relocs-mergeable.s --- a/lld/test/ELF/emit-relocs-mergeable.s +++ b/lld/test/ELF/emit-relocs-mergeable.s @@ -21,16 +21,16 @@ # CHECK-NEXT: AddressAlignment: # CHECK-NEXT: EntrySize: # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 41414100 43434300 42424200 |AAA.CCC.BBB.| +# CHECK-NEXT: 0000: 41414100 42424200 43434300 |AAA.BBB.CCC.| # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK: Relocations [ # CHECK-NEXT: Section {{.*}} .rela.foo { # CHECK-NEXT: 0x201000 R_X86_64_64 .strings 0x0 -# CHECK-NEXT: 0x201008 R_X86_64_64 .strings 0x8 -# CHECK-NEXT: 0x201010 R_X86_64_64 .strings 0x8 -# CHECK-NEXT: 0x201018 R_X86_64_64 .strings 0x4 +# CHECK-NEXT: 0x201008 R_X86_64_64 .strings 0x4 +# CHECK-NEXT: 0x201010 R_X86_64_64 .strings 0x4 +# CHECK-NEXT: 0x201018 R_X86_64_64 .strings 0x8 # CHECK-NEXT: } # CHECK-NEXT: ] diff --git a/lld/test/ELF/merge-shared-str.s b/lld/test/ELF/merge-shared-str.s --- a/lld/test/ELF/merge-shared-str.s +++ b/lld/test/ELF/merge-shared-str.s @@ -1,16 +1,14 @@ // REQUIRES: x86 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -O3 -// RUN: llvm-readobj -r -S %t.so | FileCheck %s +// RUN: ld.lld %t.o -o %t.so -shared -O2 +// RUN: llvm-readobj -r -s %t.so | FileCheck %s +.section foo,"aMS",@progbits,1 +.asciz "foobarbaz" +.asciz "barbaz" - .section foo,"aMS",@progbits,1 - .asciz "bar" - .asciz "ar" - - .data - .quad foo + 4 - +.data +.quad foo + 4 // CHECK: Name: foo // CHECK-NEXT: Type: SHT_PROGBITS @@ -23,6 +21,6 @@ // CHECK: Relocations [ // CHECK-NEXT: Section ({{.*}}) .rela.dyn { -// CHECK-NEXT: 0x{{.*}} R_X86_64_RELATIVE - 0x261 +// CHECK-NEXT: 0x{{.*}} R_X86_64_RELATIVE - 0x264 // CHECK-NEXT: } // CHECK-NEXT: ] diff --git a/lld/test/ELF/merge-string.s b/lld/test/ELF/merge-string.s --- a/lld/test/ELF/merge-string.s +++ b/lld/test/ELF/merge-string.s @@ -8,12 +8,12 @@ // RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck --check-prefix=NOMERGE %s .section .rodata1,"aMS",@progbits,1 - .asciz "abc" + .asciz "abcdef" foo: - .ascii "a" + .ascii "abcd" bar: - .asciz "bc" - .asciz "bc" + .asciz "ef" + .asciz "cdef" .section .rodata2,"aMS",@progbits,2 .align 2 @@ -30,13 +30,13 @@ // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x20D // CHECK-NEXT: Offset: 0x20D -// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Size: // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 // CHECK-NEXT: AddressAlignment: 1 // CHECK-NEXT: EntrySize: 1 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 61626300 |abc.| +// CHECK-NEXT: |abcdef.| // CHECK-NEXT: ) // NOTAIL: Name: .rodata1 @@ -48,13 +48,13 @@ // NOTAIL-NEXT: ] // NOTAIL-NEXT: Address: 0x20D // NOTAIL-NEXT: Offset: 0x20D -// NOTAIL-NEXT: Size: 7 +// NOTAIL-NEXT: Size: // NOTAIL-NEXT: Link: 0 // NOTAIL-NEXT: Info: 0 // NOTAIL-NEXT: AddressAlignment: 1 // NOTAIL-NEXT: EntrySize: 1 // NOTAIL-NEXT: SectionData ( -// NOTAIL-NEXT: 0000: 62630061 626300 |bc.abc.| +// NOTAIL-NEXT: |abcdef.cdef.| // NOTAIL-NEXT: ) // NOMERGE: Name: .rodata1 @@ -66,13 +66,14 @@ // NOMERGE-NEXT: ] // NOMERGE-NEXT: Address: 0x20D // NOMERGE-NEXT: Offset: 0x20D -// NOMERGE-NEXT: Size: 11 +// NOMERGE-NEXT: Size: // NOMERGE-NEXT: Link: 0 // NOMERGE-NEXT: Info: 0 // NOMERGE-NEXT: AddressAlignment: 1 // NOMERGE-NEXT: EntrySize: 1 // NOMERGE-NEXT: SectionData ( -// NOMERGE-NEXT: 0000: 61626300 61626300 626300 |abc.abc.bc.| +// NOMERGE-NEXT: |abcdef.abcdef.cd| +// NOMERGE-NEXT: |ef.| // NOMERGE-NEXT: ) // CHECK: Name: .rodata2 @@ -82,24 +83,24 @@ // CHECK-NEXT: SHF_MERGE // CHECK-NEXT: SHF_STRINGS // CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x212 -// CHECK-NEXT: Offset: 0x212 -// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Address: 0x214 +// CHECK-NEXT: Offset: 0x214 +// CHECK-NEXT: Size: // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 // CHECK-NEXT: AddressAlignment: 2 // CHECK-NEXT: EntrySize: 2 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 |....| +// CHECK-NEXT: |....| // CHECK-NEXT: ) // CHECK: Name: bar -// CHECK-NEXT: Value: 0x20E +// CHECK-NEXT: Value: 0x211 // CHECK: Name: foo // CHECK-NEXT: Value: 0x20D // CHECK: Name: zed -// CHECK-NEXT: Value: 0x212 +// CHECK-NEXT: Value: 0x214 // CHECK-NEXT: Size: 0 diff --git a/lld/test/ELF/relocatable-compressed-input.s b/lld/test/ELF/relocatable-compressed-input.s --- a/lld/test/ELF/relocatable-compressed-input.s +++ b/lld/test/ELF/relocatable-compressed-input.s @@ -24,11 +24,11 @@ # CHECK-NEXT: AddressAlignment: 1 # CHECK-NEXT: EntrySize: 1 # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: {{.*}} |long unsigned in| -# CHECK-NEXT: 0010: {{.*}} |t.unsigned char.| -# CHECK-NEXT: 0020: {{.*}} |unsigned int.cha| -# CHECK-NEXT: 0030: {{.*}} |r.short unsigned| -# CHECK-NEXT: 0040: {{.*}} | int.| +# CHECK-NEXT: 0000: {{.*}} |short unsigned i| +# CHECK-NEXT: 0010: {{.*}} |nt.unsigned int.| +# CHECK-NEXT: 0020: {{.*}} |long unsigned in| +# CHECK-NEXT: 0030: {{.*}} |t.char.unsigned | +# CHECK-NEXT: 0040: {{.*}} |char.| # CHECK-NEXT: ) # CHECK-NEXT: } diff --git a/lld/test/ELF/string-gc.s b/lld/test/ELF/string-gc.s --- a/lld/test/ELF/string-gc.s +++ b/lld/test/ELF/string-gc.s @@ -15,7 +15,7 @@ // CHECK-NEXT: } // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: s3 -// CHECK-NEXT: Value: 0x200120 +// CHECK-NEXT: Value: 0x200125 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Object (0x1) @@ -24,7 +24,7 @@ // CHECK-NEXT: } // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: s1 -// CHECK-NEXT: Value: 0x200125 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Object (0x1) diff --git a/lld/test/ELF/tail-merge-string-align.s b/lld/test/ELF/tail-merge-string-align.s --- a/lld/test/ELF/tail-merge-string-align.s +++ b/lld/test/ELF/tail-merge-string-align.s @@ -1,20 +1,19 @@ // REQUIRES: x86 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -O3 -// RUN: llvm-readobj -S --section-data %t.so | FileCheck %s +// RUN: ld.lld %t.o -o %t.so -shared -O2 +// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s - .section .rodata.4a,"aMS",@progbits,1 - .align 4 - .asciz "abcdef" +.section .rodata.4a,"aMS",@progbits,1 +.align 4 +.asciz "abcdefg" - .section .rodata.4b,"aMS",@progbits,1 - .align 4 - .asciz "ef" - - .section .rodata.4c,"aMS",@progbits,1 - .align 4 - .asciz "f" +.section .rodata.4b,"aMS",@progbits,1 +.align 4 +.asciz "efg" +.section .rodata.4c,"aMS",@progbits,1 +.align 4 +.asciz "defgh" // CHECK: Name: .rodata // CHECK-NEXT: Type: SHT_PROGBITS @@ -25,11 +24,11 @@ // CHECK-NEXT: ] // CHECK-NEXT: Address: // CHECK-NEXT: Offset: -// CHECK-NEXT: Size: 1 +// CHECK-NEXT: Size: // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 // CHECK-NEXT: AddressAlignment: 4 // CHECK-NEXT: EntrySize: // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 61626364 65660000 6600 |abcdef..f.| +// CHECK-NEXT: |defgh...abcdefg.| // CHECK-NEXT: )