Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -121,6 +121,36 @@ } template +static typename InputSection::Kind +getSectionType(const typename ELFFile::Elf_Shdr &Sec) { + typedef typename ELFFile::uintX_t uintX_t; + uintX_t Flags = Sec.sh_flags; + if (!(Flags & SHF_MERGE)) + return InputSection::Regular; + if (Flags & SHF_WRITE) + error("Writable SHF_MERGE sections are not supported"); + uintX_t EntSize = Sec.sh_entsize; + if (Sec.sh_size % EntSize) + error("SHF_MERGE section size must be a multiple of sh_entsize"); + + // Don't try to merge if the aligment is larger than the sh_entsize. + // + // If this is not a SHF_STRINGS, we would need to pad after every entity. It + // would be equivalent for the producer of the .o to just set a larger + // sh_entsize. + // + // If this is a SHF_STRINGS, the larger alignment makes sense. Unfortunately + // it would complicate tail merging. This doesn't seem that common to + // justify the effort. + if (Sec.sh_addralign > EntSize) + return InputSection::Regular; + + if (Flags & SHF_STRINGS) + return InputSection::MergeString; + return InputSection::Merge; +} + +template void elf2::ObjectFile::initializeSections(DenseSet &Comdats) { uint64_t Size = this->ELFObj.getNumSections(); Sections.resize(Size); @@ -170,18 +200,21 @@ error("Relocations pointing to SHF_MERGE are not supported"); break; } - default: { - uintX_t Flags = Sec.sh_flags; - if (Flags & SHF_MERGE && !(Flags & SHF_STRINGS)) { - if (Flags & SHF_WRITE) - error("Writable SHF_MERGE sections are not supported"); - Sections[I] = new (this->Alloc) MergeInputSection(this, &Sec); - } else { + default: + switch (getSectionType(Sec)) { + case InputSection::Regular: Sections[I] = new (this->Alloc) InputSection(this, &Sec); + break; + case InputSection::Merge: + Sections[I] = new (this->Alloc) MergeInputSection(this, &Sec); + break; + case InputSection::MergeString: + Sections[I] = + new (this->Alloc) MergeStringInputSection(this, &Sec); + break; } break; } - } } } Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -33,7 +33,7 @@ ObjectFile *File; public: - enum Kind { Regular, Merge }; + enum Kind { Regular, Merge, MergeString }; Kind SectionKind; InputSectionBase(ObjectFile *File, const Elf_Shdr *Header, @@ -61,7 +61,7 @@ return std::max(Header->sh_addralign, 1); } - uintX_t getOffset(const Elf_Sym &Sym) const; + uintX_t getOffset(const Elf_Sym &Sym); ArrayRef getSectionData() const; }; @@ -70,8 +70,23 @@ InputSectionBase::Discarded(nullptr, nullptr, InputSectionBase::Regular); +template +class MergeInputSectionBase : public InputSectionBase { + typedef InputSectionBase Base; + typedef typename llvm::object::ELFFile::Elf_Shdr Elf_Shdr; + typedef typename llvm::object::ELFFile::uintX_t uintX_t; + +public: + std::vector> Offsets; + MergeInputSectionBase(ObjectFile *File, const Elf_Shdr *Header, + typename Base::Kind SectionKind); + uintX_t getOffset(uintX_t Offset); + static bool classof(const InputSectionBase *S); +}; + // This corresponds to a SHF_MERGE section of an input file. -template class MergeInputSection : public InputSectionBase { +template +class MergeInputSection : public MergeInputSectionBase { typedef InputSectionBase Base; typedef typename llvm::object::ELFFile::uintX_t uintX_t; typedef typename llvm::object::ELFFile::Elf_Sym Elf_Sym; @@ -80,7 +95,18 @@ public: MergeInputSection(ObjectFile *F, const Elf_Shdr *Header); static bool classof(const InputSectionBase *S); - uintX_t getOffset(uintX_t Offset) const; +}; + +// This corresponds to a SHF_STRINGS section of an input file. +template +class MergeStringInputSection : public MergeInputSectionBase { + typedef MergeInputSectionBase Base; + typedef typename llvm::object::ELFFile::uintX_t uintX_t; + typedef typename llvm::object::ELFFile::Elf_Shdr Elf_Shdr; + +public: + MergeStringInputSection(ObjectFile *F, const Elf_Shdr *Header); + static bool classof(const InputSectionBase *S); }; // This corresponds to a non SHF_MERGE section of an input file. Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -43,10 +43,10 @@ template typename ELFFile::uintX_t -InputSectionBase::getOffset(const Elf_Sym &Sym) const { +InputSectionBase::getOffset(const Elf_Sym &Sym) { if (auto *S = dyn_cast>(this)) return S->OutSecOff + Sym.st_value; - return cast>(this)->getOffset(Sym.st_value); + return cast>(this)->getOffset(Sym.st_value); } template @@ -119,29 +119,68 @@ } template +MergeInputSectionBase::MergeInputSectionBase( + ObjectFile *File, const Elf_Shdr *Header, + typename Base::Kind SectionKind) + : InputSectionBase(File, Header, SectionKind) {} + +template +bool MergeInputSectionBase::classof(const InputSectionBase *S) { + return S->SectionKind == Base::Merge || S->SectionKind == Base::MergeString; +} + +template +typename MergeInputSectionBase::uintX_t +MergeInputSectionBase::getOffset(uintX_t Offset) { + ArrayRef D = this->getSectionData(); + StringRef Data((char *)D.data(), D.size()); + uintX_t Size = Data.size(); + if (Offset >= Size) + error("Entry is past the end of the section"); + + auto I = std::upper_bound( + this->Offsets.begin(), this->Offsets.end(), Offset, + [](const uintX_t &A, const std::pair &B) { + return A < B.first; + }); + size_t End = I == this->Offsets.end() ? Data.size() : I->first; + + --I; + uintX_t Start = I->first; + uintX_t Addend = Offset - Start; + uintX_t &Base = I->second; + if (Base != uintX_t(-1)) + return Base + Addend; + + StringRef Entry = Data.substr(Start, End - Start - 1); + OutputSectionBase *OutSec = this->OutSec; + if (isa>(this)) + Base = + static_cast *>(OutSec)->getOffset(Entry); + else + Base = static_cast *>(OutSec)->getOffset(Entry); + + return Base + Addend; +} + +template MergeInputSection::MergeInputSection(ObjectFile *F, const Elf_Shdr *Header) - : InputSectionBase(F, Header, Base::Merge) {} + : MergeInputSectionBase(F, Header, Base::Merge) {} template bool MergeInputSection::classof(const InputSectionBase *S) { return S->SectionKind == Base::Merge; } -// FIXME: Optimize this by keeping an offset for each element. template -typename MergeInputSection::uintX_t -MergeInputSection::getOffset(uintX_t Offset) const { - ArrayRef Data = this->getSectionData(); - uintX_t EntSize = this->Header->sh_entsize; - uintX_t Addend = Offset % EntSize; - Offset -= Addend; - if (Offset + EntSize > Data.size()) - error("Entry is past the end of the section"); - Data = Data.slice(Offset, EntSize); - return static_cast *>(this->OutSec) - ->getOffset(Data) + - Addend; +MergeStringInputSection::MergeStringInputSection(ObjectFile *F, + const Elf_Shdr *Header) + : MergeInputSectionBase(F, Header, Base::MergeString) {} + +template +bool MergeStringInputSection::classof(const InputSectionBase *S) { + return S->SectionKind == Base::MergeString; } namespace lld { @@ -156,9 +195,19 @@ template class InputSection; template class InputSection; +template class MergeInputSectionBase; +template class MergeInputSectionBase; +template class MergeInputSectionBase; +template class MergeInputSectionBase; + template class MergeInputSection; template class MergeInputSection; template class MergeInputSection; template class MergeInputSection; + +template class MergeStringInputSection; +template class MergeStringInputSection; +template class MergeStringInputSection; +template class MergeStringInputSection; } } Index: ELF/OutputSections.h =================================================================== --- ELF/OutputSections.h +++ ELF/OutputSections.h @@ -29,6 +29,7 @@ template class StringTableSection; template class InputSection; template class MergeInputSection; +template class MergeStringInputSection; template class OutputSection; template class ObjectFile; template class DefinedRegular; @@ -246,12 +247,27 @@ void addSection(MergeInputSection *S); void writeTo(uint8_t *Buf) override; - unsigned getOffset(ArrayRef Val); + unsigned getOffset(StringRef Val); private: // This map is used to find if we already have an entry for a given value and, // if so, at what offset it is. - llvm::MapVector, uintX_t> Offsets; + llvm::MapVector Offsets; +}; + +template +class MergeStringOutputSection final : public OutputSectionBase { + typedef typename llvm::object::ELFFile::uintX_t uintX_t; + +public: + MergeStringOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + void addSection(MergeStringInputSection *S); + void writeTo(uint8_t *Buf) override; + unsigned getOffset(StringRef Val); + void finalize() override; + +private: + llvm::StringTableBuilder Builder; }; template Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -610,7 +610,7 @@ return cast>(S).Sym.st_value; case SymbolBody::DefinedRegularKind: { const auto &DR = cast>(S); - const InputSectionBase &SC = DR.Section; + InputSectionBase &SC = DR.Section; return SC.OutSec->getVA() + SC.getOffset(DR.Sym); } case SymbolBody::DefinedCommonKind: @@ -664,7 +664,7 @@ Offset += Addend; Addend = 0; } - return VA + cast>(Section)->getOffset(Offset) + + return VA + cast>(Section)->getOffset(Offset) + Addend; } @@ -717,8 +717,8 @@ : OutputSectionBase(Name, sh_type, sh_flags) {} template void MergeOutputSection::writeTo(uint8_t *Buf) { - for (const std::pair, uintX_t> &P : Offsets) { - ArrayRef Data = P.first; + for (const std::pair &P : Offsets) { + StringRef Data = P.first; memcpy(Buf, Data.data(), Data.size()); Buf += Data.size(); } @@ -732,12 +732,13 @@ this->Header.sh_addralign = Align; uintX_t Off = this->Header.sh_size; - ArrayRef Data = S->getSectionData(); + ArrayRef D = S->getSectionData(); + StringRef Data((char *)D.data(), D.size()); + uintX_t EntSize = S->getSectionHdr()->sh_entsize; - if (Data.size() % EntSize) - error("SHF_MERGE section size must be a multiple of sh_entsize"); for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) { - auto P = Offsets.insert(std::make_pair(Data.slice(I, EntSize), Off)); + auto P = Offsets.insert(std::make_pair(Data.substr(I, EntSize), Off)); + S->Offsets.push_back(std::make_pair(I, P.first->second)); if (P.second) Off += EntSize; } @@ -745,11 +746,75 @@ } template -unsigned MergeOutputSection::getOffset(ArrayRef Val) { +unsigned MergeOutputSection::getOffset(StringRef Val) { return Offsets.find(Val)->second; } template +MergeStringOutputSection::MergeStringOutputSection(StringRef Name, + uint32_t sh_type, + uintX_t sh_flags) + : OutputSectionBase(Name, sh_type, sh_flags) {} + +static bool isNull(StringRef S) { + for (unsigned I = 0, N = S.size(); I != N; ++I) + if (S[I] != 0) + return false; + return true; +} + +static size_t findNull(StringRef S, unsigned EntSize) { + for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { + if (isNull(S.substr(I, EntSize))) + return I; + } + return StringRef::npos; +} + +template +void MergeStringOutputSection::addSection( + MergeStringInputSection *S) { + S->OutSec = this; + uint32_t Align = S->getAlign(); + if (Align > this->Header.sh_addralign) + this->Header.sh_addralign = Align; + + ArrayRef D = S->getSectionData(); + StringRef Data((char *)D.data(), D.size()); + uintX_t EntSize = S->getSectionHdr()->sh_entsize; + + uintX_t Offset = 0; + while (!Data.empty()) { + size_t End = findNull(Data, EntSize); + if (End == StringRef::npos) + error("String is not null terminated"); + StringRef Entry = Data.substr(0, End + EntSize - 1); + Builder.add(Entry); + S->Offsets.push_back(std::make_pair(Offset, -1)); + uintX_t Size = End + EntSize; + Data = Data.substr(Size); + Offset += Size; + } +} + +template +void MergeStringOutputSection::writeTo(uint8_t *Buf) { + StringRef Data = Builder.data(); + // FIXME: Add a "raw" mode to the string table builder. + memcpy(Buf, Data.data() + 1, Data.size() - 1); +} + +template +unsigned MergeStringOutputSection::getOffset(StringRef Val) { + return Builder.getOffset(Val) - 1; +} + +template void MergeStringOutputSection::finalize() { + Builder.finalize(StringTableBuilder::ELF); + this->Header.sh_size = Builder.data().size() - 1; +} + +template StringTableSection::StringTableSection(StringRef Name, bool Dynamic) : OutputSectionBase(Name, llvm::ELF::SHT_STRTAB, Dynamic ? (uintX_t)llvm::ELF::SHF_ALLOC : 0), @@ -903,7 +968,7 @@ ESym->st_shndx = SHN_ABS; VA = Sym.st_value; } else { - const InputSectionBase *Section = File->getSection(Sym); + InputSectionBase *Section = File->getSection(Sym); if (!Section->isLive()) continue; const OutputSectionBase *OutSec = Section->OutSec; @@ -1047,6 +1112,11 @@ template class MergeOutputSection; template class MergeOutputSection; +template class MergeStringOutputSection; +template class MergeStringOutputSection; +template class MergeStringOutputSection; +template class MergeStringOutputSection; + template class StringTableSection; template class StringTableSection; template class StringTableSection; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -72,6 +72,7 @@ SpecificBumpPtrAllocator> SecAlloc; SpecificBumpPtrAllocator> MSecAlloc; + SpecificBumpPtrAllocator> MSSecAlloc; BumpPtrAllocator Alloc; std::vector *> OutputSections; unsigned getNumSections() const { return OutputSections.size() + 1; } @@ -442,6 +443,9 @@ if (IS) Sec = new (SecAlloc.Allocate()) OutputSection(Key.Name, Key.Type, Key.Flags); + else if (isa>(C)) + Sec = new (MSSecAlloc.Allocate()) + MergeStringOutputSection(Key.Name, Key.Type, Key.Flags); else Sec = new (MSecAlloc.Allocate()) MergeOutputSection(Key.Name, Key.Type, Key.Flags); @@ -450,6 +454,8 @@ } if (IS) static_cast *>(Sec)->addSection(IS); + else if (auto *SS = dyn_cast>(C)) + static_cast *>(Sec)->addSection(SS); else static_cast *>(Sec) ->addSection(cast>(C)); Index: test/elf2/merge-string-align.s =================================================================== --- /dev/null +++ test/elf2/merge-string-align.s @@ -0,0 +1,39 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: ld.lld2 %t.o -o %t.so -shared +// RUN: llvm-readobj -s %t.so | FileCheck %s + + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 + .asciz "foo" + + .section .rodata.str1.1,"aMS",@progbits,1 + .asciz "foo" + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x120 +// CHECK-NEXT: Offset: 0x120 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 16 + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x124 +// CHECK-NEXT: Offset: 0x124 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 1 Index: test/elf2/merge-string-error.s =================================================================== --- /dev/null +++ test/elf2/merge-string-error.s @@ -0,0 +1,11 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: not ld.lld2 %t.o -o %t.so -shared 2>&1 | FileCheck %s + + .section .rodata.str1.1,"aMS",@progbits,1 + .asciz "abc" + + .text + .long .rodata.str1.1 + 4 + +// CHECK: Entry is past the end of the section Index: test/elf2/merge-string-no-null.s =================================================================== --- /dev/null +++ test/elf2/merge-string-no-null.s @@ -0,0 +1,8 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: not ld.lld2 %t.o -o %t.so -shared 2>&1 | FileCheck %s + + .section .rodata.str1.1,"aMS",@progbits,1 + .ascii "abc" + +// CHECK: String is not null terminated Index: test/elf2/merge-string.s =================================================================== --- /dev/null +++ test/elf2/merge-string.s @@ -0,0 +1,65 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: ld.lld2 %t.o -o %t.so -shared +// RUN: llvm-readobj -s -section-data -t %t.so | FileCheck %s + + .section .rodata.str1.1,"aMS",@progbits,1 + .asciz "abc" +foo: + .ascii "a" +bar: + .asciz "bc" + .asciz "bc" + + .section .rodata.str2.2,"aMS",@progbits,2 + .align 2 +zed: + .short 20 + .short 0 + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x120 +// CHECK-NEXT: Offset: 0x120 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 1 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 61626300 |abc.| +// CHECK-NEXT: ) + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x124 +// CHECK-NEXT: Offset: 0x124 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 2 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 14000000 |....| +// CHECK-NEXT: ) + + +// CHECK: Name: bar +// CHECK-NEXT: Value: 0x121 + +// CHECK: Name: foo +// CHECK-NEXT: Value: 0x120 + +// CHECK: Name: zed +// CHECK-NEXT: Value: 0x124 +// CHECK-NEXT: Size: 0