Index: lld/ELF/InputSection.h =================================================================== --- lld/ELF/InputSection.h +++ lld/ELF/InputSection.h @@ -257,6 +257,7 @@ SyntheticSection *getParent() const; private: + void splitStrings1(ArrayRef A); void splitStrings(ArrayRef A, size_t Size); void splitNonStrings(ArrayRef A, size_t Size); Index: lld/ELF/InputSection.cpp =================================================================== --- lld/ELF/InputSection.cpp +++ lld/ELF/InputSection.cpp @@ -849,10 +849,6 @@ } static size_t findNull(StringRef S, size_t EntSize) { - // Optimize the common case. - if (EntSize == 1) - return S.find(0); - for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { const char *B = S.begin() + I; if (std::all_of(B, B + EntSize, [](char C) { return C == 0; })) @@ -865,6 +861,21 @@ return cast_or_null(Parent); } +void MergeInputSection::splitStrings1(ArrayRef Data) { + size_t Off = 0; + bool IsAlloc = Flags & SHF_ALLOC; + StringRef S = toStringRef(Data); + + while (!S.empty()) { + uint64_t Hash; + size_t Size = strlen_xxHash64(S.data(), Hash); + + Pieces.emplace_back(Off, Hash, !IsAlloc); + S = S.substr(Size + 1); + Off += Size + 1; + } +} + // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. void MergeInputSection::splitStrings(ArrayRef Data, size_t EntSize) { @@ -918,10 +929,14 @@ void MergeInputSection::splitIntoPieces() { assert(Pieces.empty()); - if (Flags & SHF_STRINGS) - splitStrings(Data, Entsize); - else + if (Flags & SHF_STRINGS) { + if (Entsize == 1) + splitStrings1(Data); + else + splitStrings(Data, Entsize); + } else { splitNonStrings(Data, Entsize); + } OffsetMap.reserve(Pieces.size()); for (size_t I = 0, E = Pieces.size(); I != E; ++I) Index: llvm/include/llvm/Support/xxhash.h =================================================================== --- llvm/include/llvm/Support/xxhash.h +++ llvm/include/llvm/Support/xxhash.h @@ -42,6 +42,7 @@ namespace llvm { uint64_t xxHash64(llvm::StringRef Data); +size_t strlen_xxHash64(const char *P, uint64_t &H); } #endif Index: llvm/lib/Support/xxhash.cpp =================================================================== --- llvm/lib/Support/xxhash.cpp +++ llvm/lib/Support/xxhash.cpp @@ -132,3 +132,83 @@ return H64; } + +// This function does strlen() while computing xxhash. +size_t llvm::strlen_xxHash64(const char *P, uint64_t &H) { + const char *Q = P; + size_t Remaining; + + uint64_t V0 = PRIME64_1 + PRIME64_2; + uint64_t V1 = PRIME64_2; + uint64_t V2 = 0; + uint64_t V3 = -PRIME64_1; + + for (;;) { + const uint64_t *R = (const uint64_t *)Q; + if (uint64_t X = (R[0] - 0x0101010101010101) & ~R[0] & 0x8080808080808080) { + Remaining = (__builtin_ffsl(X) >> 3) - 1; + break; + } + + if (uint64_t X = (R[1] - 0x0101010101010101) & ~R[1] & 0x8080808080808080) { + Remaining = (__builtin_ffsl(X) >> 3) + 7; + break; + } + + if (uint64_t X = (R[2] - 0x0101010101010101) & ~R[2] & 0x8080808080808080) { + Remaining = (__builtin_ffsl(X) >> 3) + 15; + break; + } + + if (uint64_t X = (R[3] - 0x0101010101010101) & ~R[3] & 0x8080808080808080) { + Remaining = (__builtin_ffsl(X) >> 3) + 23; + break; + } + + V0 = round(V0, R[0]); + V1 = round(V1, R[1]); + V2 = round(V2, R[2]); + V3 = round(V3, R[3]); + Q += 32; + } + + if (P == Q) { + H = PRIME64_5; + } else { + H = rotl64(V0, 1) + rotl64(V1, 7) + rotl64(V2, 12) + rotl64(V3, 18); + H = mergeRound(H, V0); + H = mergeRound(H, V1); + H = mergeRound(H, V2); + H = mergeRound(H, V3); + } + + const char *End = Q + Remaining; + size_t Len = End - P; + + H += Len; + + while (Q + 8 <= End) { + H ^= round(0, *(const uint64_t *)Q); + H = rotl64(H, 27) * PRIME64_1 + PRIME64_4; + Q += 8; + } + + if (Q + 4 <= End) { + H ^= (uint64_t)(*(const uint32_t *)Q) * PRIME64_1; + H = rotl64(H, 23) * PRIME64_2 + PRIME64_3; + Q += 4; + } + + while (Q < End) { + H ^= *Q * PRIME64_5; + H = rotl64(H, 11) * PRIME64_1; + ++Q; + } + + H ^= H >> 33; + H *= PRIME64_2; + H ^= H >> 29; + H *= PRIME64_3; + H ^= H >> 32; + return Len; +}