diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -299,6 +299,7 @@ SyntheticSection *getParent() const; private: + void splitStrings1(ArrayRef a); void splitStrings(ArrayRef a, size_t size); void splitNonStrings(ArrayRef a, size_t size); }; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1360,6 +1360,42 @@ return cast_or_null(parent); } +// Compute both string size and hash value. +static inline std::pair findSizeAndHash(StringRef s) { + const char *p = s.data(), *end = p + s.size(); + uint64_t h = 5381; + for (; end - p >= 8; p += 8) { + const uint64_t word = read64le(p); + // Break if any byte is NUL. + if ((word - 0x0101010101010101) & ~word & 0x8080808080808080) + break; + // Similar to djb2 but applies to a 64-bit word. + h = h * 33 + word; + } + + // Hash all bytes before the NUL byte in the original djb2 way. + for (; *p; p++) + h = h * 33 + *p; + return {p - s.data() + 1, uint32_t((h >> 32) ^ h)}; +} + +void MergeInputSection::splitStrings1(ArrayRef a) { + if (a.empty() || a.back() != 0) + fatal(toString(this) + ": string is not null terminated"); + + size_t off = 0; + const bool live = !(flags & SHF_ALLOC) || !config->gcSections; + StringRef s = toStringRef(a); + while (!s.empty()) { + size_t size; + uint32_t hash; + std::tie(size, hash) = findSizeAndHash(s); + pieces.emplace_back(off, hash, live); + s = s.substr(size); + off += size; + } +} + // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. void MergeInputSection::splitStrings(ArrayRef data, size_t entSize) { @@ -1413,10 +1449,12 @@ void MergeInputSection::splitIntoPieces() { assert(pieces.empty()); - if (flags & SHF_STRINGS) - splitStrings(data(), entsize); - else + if (!(flags & SHF_STRINGS)) splitNonStrings(data(), entsize); + else if (entsize == 1) + splitStrings1(data()); + else + splitStrings(data(), entsize); } SectionPiece *MergeInputSection::getSectionPiece(uint64_t offset) { diff --git a/lld/test/ELF/comment-gc.s b/lld/test/ELF/comment-gc.s --- a/lld/test/ELF/comment-gc.s +++ b/lld/test/ELF/comment-gc.s @@ -5,7 +5,7 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .comment: -# CHECK-NEXT: foo..LLD 1.0.bar +# CHECK-NEXT: .foo.bar.LLD 1.0{{$}} .ident "foo" diff --git a/lld/test/ELF/debug-gc.s b/lld/test/ELF/debug-gc.s --- a/lld/test/ELF/debug-gc.s +++ b/lld/test/ELF/debug-gc.s @@ -4,11 +4,11 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .debug_str: -# CHECK-NEXT: 0000 41414100 43434300 42424200 AAA.CCC.BBB. +# CHECK-NEXT: 0000 41414100 42424200 43434300 AAA.BBB.CCC. # CHECK: Contents of section .foo: # CHECK-NEXT: 0000 2a000000 # CHECK: Contents of section .debug_info: -# CHECK-NEXT: 0000 00000000 08000000 +# CHECK-NEXT: 0000 00000000 04000000 ........ .globl _start _start: diff --git a/lld/test/ELF/gc-sections-string.s b/lld/test/ELF/gc-sections-string.s --- a/lld/test/ELF/gc-sections-string.s +++ b/lld/test/ELF/gc-sections-string.s @@ -15,19 +15,19 @@ // CHECK-NEXT: } // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: s3 -// CHECK-NEXT: Value: 0x200120 -// CHECK-NEXT: Size: 0 -// CHECK-NEXT: Binding: Local (0x0) -// CHECK-NEXT: Type: Object (0x1) -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: .rodata (0x1) -// CHECK-NEXT: } -// CHECK-NEXT: Symbol { -// CHECK-NEXT: Name: s1 // CHECK-NEXT: Value: 0x200125 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: .rodata (0x1) +// CHECK-NEXT: } +// CHECK-NEXT: Symbol { +// CHECK-NEXT: Name: s1 +// CHECK-NEXT: Value: 0x200120 +// CHECK-NEXT: Size: 0 +// CHECK-NEXT: Binding: Local (0x0) +// CHECK-NEXT: Type: Object (0x1) // CHECK-NEXT: Other [ (0x2) // CHECK-NEXT: STV_HIDDEN (0x2) // CHECK-NEXT: ]