Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -111,6 +111,28 @@ } template +static typename InputSection::Kind +getSectionType(const typename ELFFile::Elf_Shdr &Sec) { + typedef typename ELFFile::uintX_t uintX_t; + uintX_t Flags = Sec.sh_flags; + if (!(Flags & SHF_MERGE)) + return InputSection::Regular; + if (Flags & SHF_WRITE) + error("Writable SHF_MERGE sections are not supported"); + uintX_t EntSize = Sec.sh_entsize; + if (Sec.sh_size % EntSize) + error("SHF_MERGE section size must be a multiple of sh_entsize"); + + // FIXME: explain + if (Sec.sh_addralign > EntSize) + return InputSection::Regular; + + if (Flags & SHF_STRINGS) + return InputSection::MergeString; + return InputSection::Merge; +} + +template void elf2::ObjectFile::initializeSections(DenseSet &Comdats) { uint64_t Size = this->ELFObj.getNumSections(); Sections.resize(Size); @@ -160,18 +182,21 @@ error("Relocations pointing to SHF_MERGE are not supported"); break; } - default: { - uintX_t Flags = Sec.sh_flags; - if (Flags & SHF_MERGE && !(Flags & SHF_STRINGS)) { - if (Flags & SHF_WRITE) - error("Writable SHF_MERGE sections are not supported"); - Sections[I] = new (this->Alloc) MergeInputSection(this, &Sec); - } else { + default: + switch (getSectionType(Sec)) { + case InputSection::Regular: Sections[I] = new (this->Alloc) InputSection(this, &Sec); + break; + case InputSection::Merge: + Sections[I] = new (this->Alloc) MergeInputSection(this, &Sec); + break; + case InputSection::MergeString: + Sections[I] = + new (this->Alloc) MergeStringInputSection(this, &Sec); + break; } break; } - } } } Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -32,7 +32,7 @@ ObjectFile *File; public: - enum Kind { Regular, Merge }; + enum Kind { Regular, Merge, MergeString }; Kind SectionKind; InputSectionBase(ObjectFile *File, const Elf_Shdr *Header, @@ -77,6 +77,19 @@ uintX_t getOffset(uintX_t Offset) const; }; +// This corresponds to a SHF_STRINGS section of an input file. +template +class MergeStringInputSection : public InputSectionBase { + typedef InputSectionBase Base; + typedef typename llvm::object::ELFFile::uintX_t uintX_t; + typedef typename llvm::object::ELFFile::Elf_Shdr Elf_Shdr; + +public: + MergeStringInputSection(ObjectFile *F, const Elf_Shdr *Header); + static bool classof(const InputSectionBase *S); + uintX_t getOffset(uintX_t Offset) const; +}; + // This corresponds to a non SHF_MERGE section of an input file. template class InputSection : public InputSectionBase { typedef InputSectionBase Base; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -46,7 +46,9 @@ InputSectionBase::getOffset(const Elf_Sym &Sym) const { if (auto *S = dyn_cast>(this)) return S->OutSecOff + Sym.st_value; - return cast>(this)->getOffset(Sym.st_value); + if (auto *S = dyn_cast>(this)) + return S->getOffset(Sym.st_value); + return cast>(this)->getOffset(Sym.st_value); } template @@ -133,17 +135,75 @@ typename MergeInputSection::uintX_t MergeInputSection::getOffset(uintX_t Offset) const { ArrayRef Data = this->getSectionData(); + if (Offset >= Data.size()) + error("Entry is past the end of the section"); uintX_t EntSize = this->Header->sh_entsize; uintX_t Addend = Offset % EntSize; Offset -= Addend; - if (Offset + EntSize > Data.size()) - error("Entry is past the end of the section"); Data = Data.slice(Offset, EntSize); return static_cast *>(this->OutSec) ->getOffset(Data) + Addend; } +template +MergeStringInputSection::MergeStringInputSection(ObjectFile *F, + const Elf_Shdr *Header) + : InputSectionBase(F, Header, Base::MergeString) {} + +template +bool MergeStringInputSection::classof(const InputSectionBase *S) { + return S->SectionKind == Base::MergeString; +} + +static bool isNull(StringRef S) { + for (unsigned I = 0, N = S.size(); I != N; ++I) + if (S[I] != 0) + return false; + return true; +} + +static size_t findNull(StringRef S, unsigned EntSize) { + for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { + if (isNull(S.substr(I, EntSize))) + return I; + } + return StringRef::npos; +} + +static size_t rfindNull(StringRef S, unsigned EntSize) { + for (unsigned I = S.size(); I;) { + I -= EntSize; + if (isNull(S.substr(I, EntSize))) + return I; + } + return StringRef::npos; +} + +template +typename MergeStringInputSection::uintX_t +MergeStringInputSection::getOffset(uintX_t Offset) const { + ArrayRef D = this->getSectionData(); + StringRef Data((char *)D.data(), D.size()); + uintX_t Size = Data.size(); + if (Offset >= Size) + error("Entry is past the end of the section"); + + uintX_t EntSize = this->Header->sh_entsize; + + size_t Aux = + rfindNull(Data.slice(0, RoundUpToAlignment(Offset, EntSize)), EntSize); + uintX_t Start = Aux == StringRef::npos ? 0 : Aux + EntSize; + + Data = Data.substr(Start); + + size_t End = findNull(Data, EntSize); + StringRef Entry = Data.substr(0, End + EntSize - 1); + return static_cast *>(this->OutSec) + ->getOffset(Entry) + + (Offset - Start); +} + namespace lld { namespace elf2 { template class InputSectionBase; @@ -160,5 +220,10 @@ template class MergeInputSection; template class MergeInputSection; template class MergeInputSection; + +template class MergeStringInputSection; +template class MergeStringInputSection; +template class MergeStringInputSection; +template class MergeStringInputSection; } } Index: ELF/OutputSections.h =================================================================== --- ELF/OutputSections.h +++ ELF/OutputSections.h @@ -29,6 +29,7 @@ template class StringTableSection; template class InputSection; template class MergeInputSection; +template class MergeStringInputSection; template class OutputSection; template class ObjectFile; template class DefinedRegular; @@ -245,6 +246,21 @@ }; template +class MergeStringOutputSection final : public OutputSectionBase { + typedef typename llvm::object::ELFFile::uintX_t uintX_t; + +public: + MergeStringOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags); + void addSection(MergeStringInputSection *S); + void writeTo(uint8_t *Buf) override; + unsigned getOffset(StringRef Val); + void finalize() override; + +private: + llvm::StringTableBuilder Builder; +}; + +template class InterpSection final : public OutputSectionBase { public: InterpSection(); Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -532,8 +532,12 @@ Offset += Addend; Addend = 0; } - return VA + cast>(Section)->getOffset(Offset) + - Addend; + uintX_t SecOffset; + if (auto *MS = dyn_cast>(Section)) + SecOffset = MS->getOffset(Offset); + else + SecOffset = cast>(Section)->getOffset(Offset); + return VA + SecOffset + Addend; } // Returns true if a symbol can be replaced at load-time by a symbol @@ -602,8 +606,6 @@ uintX_t Off = this->Header.sh_size; ArrayRef Data = S->getSectionData(); uintX_t EntSize = S->getSectionHdr()->sh_entsize; - if (Data.size() % EntSize) - error("SHF_MERGE section size must be a multiple of sh_entsize"); for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) { auto P = Offsets.insert(std::make_pair(Data.slice(I, EntSize), Off)); if (P.second) @@ -618,6 +620,68 @@ } template +MergeStringOutputSection::MergeStringOutputSection(StringRef Name, + uint32_t sh_type, + uintX_t sh_flags) + : OutputSectionBase(Name, sh_type, sh_flags) {} + +static bool isNull(StringRef S) { + for (unsigned I = 0, N = S.size(); I != N; ++I) + if (S[I] != 0) + return false; + return true; +} + +static size_t findNull(StringRef S, unsigned EntSize) { + for (unsigned I = 0, N = S.size(); I != N; I += EntSize) { + if (isNull(S.substr(I, EntSize))) + return I; + } + return StringRef::npos; +} + +template +void MergeStringOutputSection::addSection( + MergeStringInputSection *S) { + // FIXME: dup + S->OutSec = this; + uint32_t Align = S->getAlign(); + if (Align > this->Header.sh_addralign) + this->Header.sh_addralign = Align; + + // FIXME: dup + ArrayRef D = S->getSectionData(); + StringRef Data((char *)D.data(), D.size()); + uintX_t EntSize = S->getSectionHdr()->sh_entsize; + + while (!Data.empty()) { + size_t End = findNull(Data, EntSize); + if (End == StringRef::npos) + error("foo"); + StringRef Entry = Data.substr(0, End + EntSize - 1); + Builder.add(Entry); + Data = Data.substr(End + EntSize); + } +} + +template +void MergeStringOutputSection::writeTo(uint8_t *Buf) { + StringRef Data = Builder.data(); + // FIXME: Add a "raw" mode to the string table builder. + memcpy(Buf, Data.data() + 1, Data.size() - 1); +} + +template +unsigned MergeStringOutputSection::getOffset(StringRef Val) { + return Builder.getOffset(Val) - 1; +} + +template void MergeStringOutputSection::finalize() { + Builder.finalize(StringTableBuilder::ELF); + this->Header.sh_size = Builder.data().size() - 1; +} + +template StringTableSection::StringTableSection(StringRef Name, bool Dynamic) : OutputSectionBase(Name, llvm::ELF::SHT_STRTAB, Dynamic ? (uintX_t)llvm::ELF::SHF_ALLOC : 0), @@ -878,6 +942,11 @@ template class MergeOutputSection; template class MergeOutputSection; +template class MergeStringOutputSection; +template class MergeStringOutputSection; +template class MergeStringOutputSection; +template class MergeStringOutputSection; + template class StringTableSection; template class StringTableSection; template class StringTableSection; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -72,6 +72,7 @@ SpecificBumpPtrAllocator> SecAlloc; SpecificBumpPtrAllocator> MSecAlloc; + SpecificBumpPtrAllocator> MSSecAlloc; BumpPtrAllocator Alloc; std::vector *> OutputSections; unsigned getNumSections() const { return OutputSections.size() + 1; } @@ -438,6 +439,9 @@ if (IS) Sec = new (SecAlloc.Allocate()) OutputSection(Key.Name, Key.Type, Key.Flags); + else if (isa>(C)) + Sec = new (MSSecAlloc.Allocate()) + MergeStringOutputSection(Key.Name, Key.Type, Key.Flags); else Sec = new (MSecAlloc.Allocate()) MergeOutputSection(Key.Name, Key.Type, Key.Flags); @@ -446,6 +450,8 @@ } if (IS) static_cast *>(Sec)->addSection(IS); + else if (auto *SS = dyn_cast>(C)) + static_cast *>(Sec)->addSection(SS); else static_cast *>(Sec) ->addSection(cast>(C)); Index: test/elf2/merge-string-align.s =================================================================== --- /dev/null +++ test/elf2/merge-string-align.s @@ -0,0 +1,39 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: ld.lld2 %t.o -o %t.so -shared +// RUN: llvm-readobj -s %t.so | FileCheck %s + + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 + .asciz "foo" + + .section .rodata.str1.1,"aMS",@progbits,1 + .asciz "foo" + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x120 +// CHECK-NEXT: Offset: 0x120 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 16 + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x124 +// CHECK-NEXT: Offset: 0x124 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 1 Index: test/elf2/merge-string-error.s =================================================================== --- /dev/null +++ test/elf2/merge-string-error.s @@ -0,0 +1,11 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: not ld.lld2 %t.o -o %t.so -shared 2>&1 | FileCheck %s + + .section .rodata.str1.1,"aMS",@progbits,1 + .asciz "abc" + + .text + .long .rodata.str1.1 + 4 + +// CHECK: Entry is past the end of the section Index: test/elf2/merge-string.s =================================================================== --- /dev/null +++ test/elf2/merge-string.s @@ -0,0 +1,64 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: ld.lld2 %t.o -o %t.so -shared +// RUN: llvm-readobj -s -section-data -t %t.so | FileCheck %s + + .section .rodata.str1.1,"aMS",@progbits,1 + .asciz "abc" +foo: + .ascii "a" +bar: + .asciz "bc" + + .section .rodata.str2.2,"aMS",@progbits,2 + .align 2 +zed: + .short 20 + .short 0 + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x120 +// CHECK-NEXT: Offset: 0x120 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 1 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 61626300 |abc.| +// CHECK-NEXT: ) + +// CHECK: Name: .rodata +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_MERGE +// CHECK-NEXT: SHF_STRINGS +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x124 +// CHECK-NEXT: Offset: 0x124 +// CHECK-NEXT: Size: 4 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 2 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 14000000 |....| +// CHECK-NEXT: ) + + +// CHECK: Name: bar +// CHECK-NEXT: Value: 0x121 + +// CHECK: Name: foo +// CHECK-NEXT: Value: 0x120 + +// CHECK: Name: zed +// CHECK-NEXT: Value: 0x124 +// CHECK-NEXT: Size: 0