Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -200,16 +200,14 @@ // be found by looking at the next one). struct SectionPiece { SectionPiece(size_t Off, uint32_t Hash, bool Live) - : InputOff(Off), Hash(Hash), OutputOff(0), - Live(Live || !Config->GcSections) {} + : InputOff(Off), Hash(Hash >> 1), Live(Live || !Config->GcSections) {} uint32_t InputOff; - uint32_t Hash; - int64_t OutputOff : 63; - uint64_t Live : 1; + uint32_t Hash : 31; + uint32_t Live : 1; }; -static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big"); +static_assert(sizeof(SectionPiece) == 8, "SectionPiece is too big"); // This corresponds to a SHF_MERGE section of an input file. class MergeInputSection : public InputSectionBase { @@ -236,6 +234,7 @@ // Splittable sections are handled as a sequence of data // rather than a single large blob of data. std::vector Pieces; + std::vector OutputOffsets; llvm::DenseMap OffsetMap; // Returns I'th piece's data. This function is very hot when Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -920,6 +920,7 @@ splitNonStrings(Data, Entsize); OffsetMap.reserve(Pieces.size()); + OutputOffsets.resize(Pieces.size()); for (size_t I = 0, E = Pieces.size(); I != E; ++I) OffsetMap[Pieces[I].InputOff] = I; @@ -942,7 +943,8 @@ } // Do binary search to get a section piece at a given input offset. -static SectionPiece *findSectionPiece(MergeInputSection *Sec, uint64_t Offset) { +static int findSectionPieceIndex(const MergeInputSection *Sec, + uint64_t Offset) { if (Sec->Data.size() <= Offset) fatal(toString(Sec) + ": entry is past the end of the section"); @@ -951,7 +953,7 @@ Sec->Pieces.begin(), Sec->Pieces.end(), Offset, [](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; }); --I; - return &*I; + return std::distance(Sec->Pieces.begin(), I); } SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) { @@ -962,7 +964,7 @@ // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to search from the original section piece vector. - return findSectionPiece(this, Offset); + return &Pieces[findSectionPieceIndex(this, Offset)]; } // Returns the offset in an output section for a given input offset. @@ -972,14 +974,13 @@ // Find a string starting at a given offset. auto It = OffsetMap.find(Offset); if (It != OffsetMap.end()) - return Pieces[It->second].OutputOff; + return OutputOffsets[It->second]; // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to search from the original section piece vector. - const SectionPiece &Piece = - *findSectionPiece(const_cast(this), Offset); - uint64_t Addend = Offset - Piece.InputOff; - return Piece.OutputOff + Addend; + int Index = findSectionPieceIndex(this, Offset); + uint64_t Addend = Offset - Pieces[Index].InputOff; + return OutputOffsets[Index] + Addend; } template InputSection::InputSection(ObjFile &, const ELF32LE::Shdr &, Index: ELF/SyntheticSections.h =================================================================== --- ELF/SyntheticSections.h +++ ELF/SyntheticSections.h @@ -723,7 +723,8 @@ // If we use lower bits, it significantly increases the probability of // hash collisons. size_t getShardId(uint32_t Hash) { - return Hash >> (32 - llvm::countTrailingZeros(NumShards)); + assert((Hash & (1 << 31)) == 0); + return Hash >> (31 - llvm::countTrailingZeros(NumShards)); } // Section size Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -2468,7 +2468,7 @@ for (MergeInputSection *Sec : Sections) for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); + Sec->OutputOffsets[I] = Builder.getOffset(Sec->getData(I)); } void MergeNoTailSection::writeTo(uint8_t *Buf) { @@ -2502,7 +2502,7 @@ for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { size_t ShardId = getShardId(Sec->Pieces[I].Hash); if ((ShardId & (Concurrency - 1)) == ThreadId && Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I)); + Sec->OutputOffsets[I] = Shards[ShardId].add(Sec->getData(I)); } } }); @@ -2523,8 +2523,7 @@ parallelForEach(Sections, [&](MergeInputSection *Sec) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff += - ShardOffsets[getShardId(Sec->Pieces[I].Hash)]; + Sec->OutputOffsets[I] += ShardOffsets[getShardId(Sec->Pieces[I].Hash)]; }); }