Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -849,10 +849,6 @@ } static size_t findNull(StringRef S, size_t EntSize) { - // Optimize the common case. - if (EntSize == 1) - return S.find(0); - for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { const char *B = S.begin() + I; if (std::all_of(B, B + EntSize, [](char C) { return C == 0; })) @@ -865,20 +861,78 @@ return cast_or_null(Parent); } +// This helper function takes a null-terminated string and finds string size and +// hash value. Return value is [uint32 Size, uint32 Hash] packed into uint64. +static inline uint64_t findSizeAndHash(StringRef S) { + size_t DataSize = S.size(); + const uint8_t *Data = reinterpret_cast(S.data()); + + // We are going to calculate simple hash based on DJB hash below. Hash is + // calculated as the same time as we read the string bytes for speedup. + uint32_t Hash = 5381; + + // Here we load and test single bytes until a word boundary is reached. + // That allows loading aligned words to register below. We also starting to + // calculate the string hash here. + while (reinterpret_cast(Data) & (sizeof(uint32_t) - 1)) { + if (!DataSize-- || !*Data) + return (S.size() - DataSize) | ((uint64_t)Hash << 32); + Hash = (Hash << 5) + Hash + *Data; + ++Data; + } + + // Load a word at a time and test if any of bytes is 0-byte. + while (DataSize > sizeof(uint32_t)) { + uint32_t Word = read32(Data); + // This checks if at least one byte of a word is a null byte. + // If we found such case we want to break the loop and continue + // testing the single bytes to find the exact null byte position. + if (((Word - 0x01010101) & ~Word & 0x80808080) != 0) + break; + Data += sizeof(uint32_t); + DataSize -= sizeof(uint32_t); + Hash = (Hash << 5) + Hash + Word; + } + + // Now find the exact position of the null byte. Do not forget to + // update the hash value too. + while (DataSize--) { + Hash = (Hash << 5) + Hash + *Data; + if (!*Data) + return (S.size() - DataSize) | ((uint64_t)Hash << 32); + ++Data; + } + + llvm_unreachable("string is not null terminated"); +} + // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. void MergeInputSection::splitStrings(ArrayRef Data, size_t EntSize) { + if (Data.size() < EntSize || Data.back() != 0) + fatal(toString(this) + ": string is too short or not null terminated"); + size_t Off = 0; bool IsAlloc = Flags & SHF_ALLOC; StringRef S = toStringRef(Data); while (!S.empty()) { - size_t End = findNull(S, EntSize); - if (End == StringRef::npos) - fatal(toString(this) + ": string is not null terminated"); - size_t Size = End + EntSize; + uint32_t Hash; + size_t Size; + + if (EntSize == 1) { + uint64_t SizeHash = findSizeAndHash(S); + Size = SizeHash & 0xFFFFFFFF; + Hash = SizeHash >> 32; + } else { + size_t End = findNull(S, EntSize); + if (End == StringRef::npos) + fatal(toString(this) + ": string is not null terminated"); + Size = findNull(S, EntSize) + EntSize; + Hash = xxHash64(S.substr(0, Size)); + } - Pieces.emplace_back(Off, xxHash64(S.substr(0, Size)), !IsAlloc); + Pieces.emplace_back(Off, Hash, !IsAlloc); S = S.substr(Size); Off += Size; } Index: test/ELF/comment-gc.s =================================================================== --- test/ELF/comment-gc.s +++ test/ELF/comment-gc.s @@ -5,7 +5,8 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .comment: -# CHECK-NEXT: foo..LLD 1.0.bar +# CHECK-NEXT: ..LLD 1.0.foo.ba +# CHECK-NEXT: r. .ident "foo" Index: test/ELF/debug-gc.s =================================================================== --- test/ELF/debug-gc.s +++ test/ELF/debug-gc.s @@ -4,11 +4,11 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .debug_str: -# CHECK-NEXT: 0000 41414100 43434300 42424200 AAA.CCC.BBB. +# CHECK-NEXT: 0000 41414100 42424200 43434300 AAA.BBB.CCC. # CHECK: Contents of section .foo: # CHECK-NEXT: 0000 2a000000 # CHECK: Contents of section .debug_info: -# CHECK-NEXT: 0000 00000000 08000000 +# CHECK-NEXT: 0000 00000000 04000000 .globl _start _start: Index: test/ELF/emit-relocs-mergeable-i386.s =================================================================== --- test/ELF/emit-relocs-mergeable-i386.s +++ test/ELF/emit-relocs-mergeable-i386.s @@ -5,7 +5,7 @@ ## Check lf we produce proper relocations when doing merging of SHF_MERGE sections. -## Check addends of relocations are: 0x0, 0x8, 0x8, 0x4 +## Check addends of relocations are: 0x0, 0x4, 0x4, 0x8 # CHECK: Section { # CHECK: Index: # CHECK: Name: .foo @@ -22,11 +22,11 @@ # CHECK-NEXT: AddressAlignment: # CHECK-NEXT: EntrySize: # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 00000000 08000000 08000000 04000000 +# CHECK-NEXT: 0000: 00000000 04000000 04000000 08000000 # CHECK-NEXT: ) # CHECK-NEXT: } -## Check that offsets for AAA is 0x0, for BBB is 0x8 and CCC has offset 0x4. +## Check that offsets for AAA is 0x0, for BBB is 0x4 and CCC has offset 0x8. # CHECK: Section { # CHECK: Index: # CHECK: Name: .strings @@ -43,7 +43,7 @@ # CHECK-NEXT: AddressAlignment: # CHECK-NEXT: EntrySize: # CHECK-NEXT: SectionData ( -# CHECK-NEXT: |AAA.CCC.BBB.| +# CHECK-NEXT: |AAA.BBB.CCC.| # CHECK-NEXT: ) # CHECK-NEXT: } Index: test/ELF/emit-relocs-mergeable.s =================================================================== --- test/ELF/emit-relocs-mergeable.s +++ test/ELF/emit-relocs-mergeable.s @@ -21,16 +21,16 @@ # CHECK-NEXT: AddressAlignment: # CHECK-NEXT: EntrySize: # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 41414100 43434300 42424200 |AAA.CCC.BBB.| +# CHECK-NEXT: 0000: 41414100 42424200 43434300 |AAA.BBB.CCC.| # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK: Relocations [ # CHECK-NEXT: Section {{.*}} .rela.foo { # CHECK-NEXT: 0x201000 R_X86_64_64 .strings 0x0 -# CHECK-NEXT: 0x201008 R_X86_64_64 .strings 0x8 -# CHECK-NEXT: 0x201010 R_X86_64_64 .strings 0x8 -# CHECK-NEXT: 0x201018 R_X86_64_64 .strings 0x4 +# CHECK-NEXT: 0x201008 R_X86_64_64 .strings 0x4 +# CHECK-NEXT: 0x201010 R_X86_64_64 .strings 0x4 +# CHECK-NEXT: 0x201018 R_X86_64_64 .strings 0x8 # CHECK-NEXT: } # CHECK-NEXT: ] Index: test/ELF/merge-string-no-null.s =================================================================== --- test/ELF/merge-string-no-null.s +++ test/ELF/merge-string-no-null.s @@ -5,4 +5,4 @@ .section .rodata.str1.1,"aMS",@progbits,1 .ascii "abc" -// CHECK: string is not null terminated +// CHECK: string is too short or not null terminated Index: test/ELF/string-gc.s =================================================================== --- test/ELF/string-gc.s +++ test/ELF/string-gc.s @@ -14,7 +14,7 @@ // CHECK-NEXT: } // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: s3 -// CHECK-NEXT: Value: 0x200120 +// CHECK-NEXT: Value: 0x200125 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Object (0x1) @@ -23,7 +23,7 @@ // CHECK-NEXT: } // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: s1 -// CHECK-NEXT: Value: 0x200125 +// CHECK-NEXT: Value: 0x200120 // CHECK-NEXT: Size: 0 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Object (0x1)