Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -199,17 +199,14 @@ // have to be as compact as possible, which is why we don't store the size (can // be found by looking at the next one). struct SectionPiece { - SectionPiece(size_t Off, uint32_t Hash, bool Live) - : InputOff(Off), Hash(Hash), OutputOff(0), - Live(Live || !Config->GcSections) {} + SectionPiece(bool Live) : OutputOff(0), Live(Live || !Config->GcSections) {} + SectionPiece() = default; - uint32_t InputOff; - uint32_t Hash; int64_t OutputOff : 63; uint64_t Live : 1; }; -static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big"); +static_assert(sizeof(SectionPiece) == 8, "SectionPiece is too big"); // This corresponds to a SHF_MERGE section of an input file. class MergeInputSection : public InputSectionBase { @@ -235,6 +232,8 @@ // Splittable sections are handled as a sequence of data // rather than a single large blob of data. + std::vector InputOffsets; + std::vector Hashes; std::vector Pieces; llvm::DenseMap OffsetMap; @@ -242,10 +241,10 @@ // string merging is enabled, so we want to inline. LLVM_ATTRIBUTE_ALWAYS_INLINE llvm::CachedHashStringRef getData(size_t I) const { - size_t Begin = Pieces[I].InputOff; + size_t Begin = InputOffsets[I]; size_t End = - (Pieces.size() - 1 == I) ? Data.size() : Pieces[I + 1].InputOff; - return {toStringRef(Data.slice(Begin, End - Begin)), Pieces[I].Hash}; + (InputOffsets.size() - 1 == I) ? Data.size() : InputOffsets[I + 1]; + return {toStringRef(Data.slice(Begin, End - Begin)), Hashes[I]}; } // Returns the SectionPiece at a given input section offset. Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -866,7 +866,6 @@ // null-terminated strings. void MergeInputSection::splitStrings(ArrayRef Data, size_t EntSize) { size_t Off = 0; - bool IsAlloc = Flags & SHF_ALLOC; StringRef S = toStringRef(Data); while (!S.empty()) { @@ -875,7 +874,8 @@ fatal(toString(this) + ": string is not null terminated"); size_t Size = End + EntSize; - Pieces.emplace_back(Off, xxHash64(S.substr(0, Size)), !IsAlloc); + Hashes.push_back(xxHash64(S.substr(0, Size))); + InputOffsets.push_back(Off); S = S.substr(Size); Off += Size; } @@ -887,11 +887,11 @@ size_t EntSize) { size_t Size = Data.size(); assert((Size % EntSize) == 0); - bool IsAlloc = Flags & SHF_ALLOC; - for (size_t I = 0; I != Size; I += EntSize) - Pieces.emplace_back(I, xxHash64(toStringRef(Data.slice(I, EntSize))), - !IsAlloc); + for (size_t I = 0; I != Size; I += EntSize) { + Hashes.push_back(xxHash64(toStringRef(Data.slice(I, EntSize)))); + InputOffsets.push_back(I); + } } template @@ -913,16 +913,20 @@ // Note that this function is called from parallelForEach. This must be // thread-safe (i.e. no memory allocation from the pools). void MergeInputSection::splitIntoPieces() { - assert(Pieces.empty()); + assert(InputOffsets.empty()); if (Flags & SHF_STRINGS) splitStrings(Data, Entsize); else splitNonStrings(Data, Entsize); - OffsetMap.reserve(Pieces.size()); - for (size_t I = 0, E = Pieces.size(); I != E; ++I) - OffsetMap[Pieces[I].InputOff] = I; + bool IsAlloc = Flags & SHF_ALLOC; + OffsetMap.reserve(InputOffsets.size()); + Pieces.resize(InputOffsets.size()); + for (size_t I = 0, E = InputOffsets.size(); I != E; ++I) { + OffsetMap[InputOffsets[I]] = I; + Pieces[I] = {!IsAlloc}; + } if (Config->GcSections && (Flags & SHF_ALLOC)) for (uint32_t Off : LiveOffsets) @@ -943,16 +947,17 @@ } // Do binary search to get a section piece at a given input offset. -static SectionPiece *findSectionPiece(MergeInputSection *Sec, uint64_t Offset) { +static int findSectionPieceIndex(const MergeInputSection *Sec, + uint64_t Offset) { if (Sec->Data.size() <= Offset) fatal(toString(Sec) + ": entry is past the end of the section"); // Find the element this offset points to. auto I = fastUpperBound( - Sec->Pieces.begin(), Sec->Pieces.end(), Offset, - [](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; }); + Sec->InputOffsets.begin(), Sec->InputOffsets.end(), Offset, + [](const uint64_t &A, const uint32_t &B) { return A < B; }); --I; - return &*I; + return std::distance(Sec->InputOffsets.begin(), I); } SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) { @@ -963,7 +968,8 @@ // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to search from the original section piece vector. - return findSectionPiece(this, Offset); + int Index = findSectionPieceIndex(this, Offset); + return &Pieces[Index]; } // Returns the offset in an output section for a given input offset. @@ -977,10 +983,9 @@ // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to search from the original section piece vector. - const SectionPiece &Piece = - *findSectionPiece(const_cast(this), Offset); - uint64_t Addend = Offset - Piece.InputOff; - return Piece.OutputOff + Addend; + int Index = findSectionPieceIndex(this, Offset); + uint64_t Addend = Offset - InputOffsets[Index]; + return Pieces[Index].OutputOff + Addend; } template InputSection::InputSection(ObjFile &, const ELF32LE::Shdr &, Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -2440,10 +2440,13 @@ // finalize() fixed tail-optimized strings, so we can now get // offsets of strings. Get an offset for each string and save it // to a corresponding StringPiece for easy access. - for (MergeInputSection *Sec : Sections) + for (MergeInputSection *Sec : Sections) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); + Sec->Hashes.clear(); + Sec->Hashes.shrink_to_fit(); + } } void MergeNoTailSection::writeTo(uint8_t *Buf) { @@ -2475,7 +2478,7 @@ parallelForEachN(0, Concurrency, [&](size_t ThreadId) { for (MergeInputSection *Sec : Sections) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { - size_t ShardId = getShardId(Sec->Pieces[I].Hash); + size_t ShardId = getShardId(Sec->Hashes[I]); if ((ShardId & (Concurrency - 1)) == ThreadId && Sec->Pieces[I].Live) Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I)); } @@ -2498,8 +2501,9 @@ parallelForEach(Sections, [&](MergeInputSection *Sec) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff += - ShardOffsets[getShardId(Sec->Pieces[I].Hash)]; + Sec->Pieces[I].OutputOff += ShardOffsets[getShardId(Sec->Hashes[I])]; + Sec->Hashes.clear(); + Sec->Hashes.shrink_to_fit(); }); }