Index: llvm/trunk/test/tools/llvm-objcopy/Inputs/ungzip.py =================================================================== --- llvm/trunk/test/tools/llvm-objcopy/Inputs/ungzip.py +++ llvm/trunk/test/tools/llvm-objcopy/Inputs/ungzip.py @@ -0,0 +1,5 @@ +import gzip +import sys + +with gzip.open(sys.argv[1], 'rb') as f: + sys.stdout.write(f.read()) Index: llvm/trunk/test/tools/llvm-objcopy/auto-remove-shndx.test =================================================================== --- llvm/trunk/test/tools/llvm-objcopy/auto-remove-shndx.test +++ llvm/trunk/test/tools/llvm-objcopy/auto-remove-shndx.test @@ -0,0 +1,5 @@ +# RUN: python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t +# RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t %t2 +# RUN: llvm-readobj -sections %t2 | FileCheck --check-prefix=SECS %s + +# SECS-NOT: Name: .symtab_shndx Index: llvm/trunk/test/tools/llvm-objcopy/many-sections.test =================================================================== --- llvm/trunk/test/tools/llvm-objcopy/many-sections.test +++ llvm/trunk/test/tools/llvm-objcopy/many-sections.test @@ -0,0 +1,53 @@ +RUN: python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t +RUN: llvm-objcopy %t %t2 +RUN: llvm-readobj -file-headers %t2 | FileCheck --check-prefix=EHDR %s +RUN: llvm-readobj -sections %t2 | FileCheck --check-prefix=SECS %s +RUN: llvm-readobj -symbols %t2 | grep "Symbol {" | wc -l | FileCheck --check-prefix=SYMS %s + +EHDR: Format: ELF64-x86-64 +EHDR-NEXT: Arch: x86_64 +EHDR-NEXT: AddressSize: 64bit +EHDR-NEXT: LoadName: +EHDR-NEXT: ElfHeader { +EHDR-NEXT: Ident { +EHDR-NEXT: Magic: (7F 45 4C 46) +EHDR-NEXT: Class: 64-bit (0x2) +EHDR-NEXT: DataEncoding: LittleEndian (0x1) +EHDR-NEXT: FileVersion: 1 +EHDR-NEXT: OS/ABI: SystemV (0x0) +EHDR-NEXT: ABIVersion: 0 +EHDR-NEXT: Unused: (00 00 00 00 00 00 00) +EHDR-NEXT: } +EHDR-NEXT: Type: Relocatable (0x1) +EHDR-NEXT: Machine: EM_X86_64 (0x3E) +EHDR-NEXT: Version: 1 +EHDR-NEXT: Entry: 0x0 +EHDR-NEXT: ProgramHeaderOffset: 0x40 +EHDR-NEXT: SectionHeaderOffset: +EHDR-NEXT: Flags [ (0x0) +EHDR-NEXT: ] +EHDR-NEXT: HeaderSize: 64 +EHDR-NEXT: ProgramHeaderEntrySize: 56 +EHDR-NEXT: ProgramHeaderCount: 0 +EHDR-NEXT: SectionHeaderEntrySize: 64 +EHDR-NEXT: SectionHeaderCount: 0 +EHDR-NEXT: StringTableSectionIndex: 65535 +EHDR-NEXT: } + +SECS: Index: 65285 +SECS-NEXT: Name: .symtab +SECS-NEXT: Type: SHT_SYMTAB +SECS: Name: .symtab_shndx +SECS-NEXT: Type: SHT_SYMTAB_SHNDX +SECS-NEXT: Flags [ (0x0) +SECS-NEXT: ] +SECS-NEXT: Address: 0x0 +SECS-NEXT: Offset: +# There should be #syms * EntrySize bytes. +SECS-NEXT: Size: 261136 +SECS-NEXT: Link: 65285 +SECS-NEXT: Info: +SECS-NEXT: AddressAlignment: 4 +SECS-NEXT: EntrySize: 4 +SECS: Index: 65287 +SYMS: 65284 Index: llvm/trunk/test/tools/llvm-objcopy/remove-shndx.test =================================================================== --- llvm/trunk/test/tools/llvm-objcopy/remove-shndx.test +++ llvm/trunk/test/tools/llvm-objcopy/remove-shndx.test @@ -0,0 +1,7 @@ +# This test checks to see that a .symtab_shndx section is added to any binary +# that needs it, even if the original was removed. +RUN: python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t +RUN: llvm-objcopy -R .symtab_shndx %t %t2 +RUN: llvm-readobj -sections %t2 | FileCheck %s + +CHECK: Name: .symtab_shndx ( Index: llvm/trunk/test/tools/llvm-objcopy/strict-no-add.test =================================================================== --- llvm/trunk/test/tools/llvm-objcopy/strict-no-add.test +++ llvm/trunk/test/tools/llvm-objcopy/strict-no-add.test @@ -0,0 +1,10 @@ +# This test makes sure that sections added at the end that don't have symbols +# defined in them don't trigger the creation of a large index table. + +RUN: python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t.0 +RUN: cat %p/Inputs/alloc-symtab.o > %t +RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t.0 %t2 +RUN: llvm-objcopy -add-section=.s0=%t -add-section=.s1=%t -add-section=.s2=%t %t2 %t2 +RUN: llvm-readobj -sections %t2 | FileCheck --check-prefix=SECS %s + +SECS-NOT: Name: .symtab_shndx Index: llvm/trunk/tools/llvm-objcopy/Object.h =================================================================== --- llvm/trunk/tools/llvm-objcopy/Object.h +++ llvm/trunk/tools/llvm-objcopy/Object.h @@ -37,6 +37,7 @@ class DynamicRelocationSection; class GnuDebugLinkSection; class GroupSection; +class SectionIndexSection; class Segment; class Object; struct Symbol; @@ -54,10 +55,10 @@ iterator begin() { return iterator(Sections.data()); } iterator end() { return iterator(Sections.data() + Sections.size()); } - SectionBase *getSection(uint16_t Index, Twine ErrMsg); + SectionBase *getSection(uint32_t Index, Twine ErrMsg); template - T *getSectionOfType(uint16_t Index, Twine IndexErrMsg, Twine TypeErrMsg); + T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg); }; enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; @@ -74,6 +75,7 @@ virtual void visit(const DynamicRelocationSection &Sec) = 0; virtual void visit(const GnuDebugLinkSection &Sec) = 0; virtual void visit(const GroupSection &Sec) = 0; + virtual void visit(const SectionIndexSection &Sec) = 0; }; class SectionWriter : public SectionVisitor { @@ -91,6 +93,7 @@ virtual void visit(const RelocationSection &Sec) override = 0; virtual void visit(const GnuDebugLinkSection &Sec) override = 0; virtual void visit(const GroupSection &Sec) override = 0; + virtual void visit(const SectionIndexSection &Sec) override = 0; explicit SectionWriter(Buffer &Buf) : Out(Buf) {} }; @@ -107,6 +110,7 @@ void visit(const RelocationSection &Sec) override; void visit(const GnuDebugLinkSection &Sec) override; void visit(const GroupSection &Sec) override; + void visit(const SectionIndexSection &Sec) override; explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; @@ -123,6 +127,7 @@ void visit(const RelocationSection &Sec) override; void visit(const GnuDebugLinkSection &Sec) override; void visit(const GroupSection &Sec) override; + void visit(const SectionIndexSection &Sec) override; explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; @@ -230,8 +235,9 @@ StringRef Name; Segment *ParentSegment = nullptr; uint64_t HeaderOffset; - uint64_t OriginalOffset; + uint64_t OriginalOffset = std::numeric_limits::max(); uint32_t Index; + bool HasSymbol = false; uint64_t Addr = 0; uint64_t Align = 1; @@ -371,6 +377,7 @@ SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, + SYMBOL_XINDEX = ELF::SHN_XINDEX, }; struct Symbol { @@ -389,6 +396,32 @@ uint16_t getShndx() const; }; +class SectionIndexSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +private: + std::vector Indexes; + SymbolTableSection *Symbols = nullptr; + +public: + virtual ~SectionIndexSection() {} + void addIndex(uint32_t Index) { + Indexes.push_back(Index); + Size += 4; + } + void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } + void initialize(SectionTableRef SecTable) override; + void finalize() override; + void accept(SectionVisitor &Visitor) const override; + + SectionIndexSection() { + Name = ".symtab_shndx"; + Align = 4; + EntrySize = 4; + Type = ELF::SHT_SYMTAB_SHNDX; + } +}; + class SymbolTableSection : public SectionBase { MAKE_SEC_WRITER_FRIEND @@ -398,6 +431,7 @@ protected: std::vector> Symbols; StringTableSection *SymbolNames = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; using SymPtr = std::unique_ptr; @@ -405,9 +439,13 @@ void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility, uint16_t Shndx, uint64_t Sz); - void addSymbolNames(); + void prepareForLayout(); // An 'empty' symbol table still contains a null symbol. bool empty() const { return Symbols.size() == 1; } + void setShndxTable(SectionIndexSection *ShndxTable) { + SectionIndexTable = ShndxTable; + } + const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } const SectionBase *getStrTab() const { return SymbolNames; } const Symbol *getSymbolByIndex(uint32_t Index) const; Symbol *getSymbolByIndex(uint32_t Index); @@ -589,6 +627,7 @@ using Elf_Addr = typename ELFT::Addr; using Elf_Shdr = typename ELFT::Shdr; using Elf_Ehdr = typename ELFT::Ehdr; + using Elf_Word = typename ELFT::Word; const ELFFile &ElfFile; Object &Obj; @@ -652,6 +691,7 @@ StringTableSection *SectionNames = nullptr; SymbolTableSection *SymbolTable = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; void sortSections(); SectionTableRef sections() { return SectionTableRef(Sections); } Index: llvm/trunk/tools/llvm-objcopy/Object.cpp =================================================================== --- llvm/trunk/tools/llvm-objcopy/Object.cpp +++ llvm/trunk/tools/llvm-objcopy/Object.cpp @@ -101,6 +101,10 @@ SectionVisitor::~SectionVisitor() {} +void BinarySectionWriter::visit(const SectionIndexSection &Sec) { + error("Cannot write symbol section index table '" + Sec.Name + "' "); +} + void BinarySectionWriter::visit(const SymbolTableSection &Sec) { error("Cannot write symbol table '" + Sec.Name + "' out to binary"); } @@ -154,6 +158,29 @@ Visitor.visit(*this); } +template +void ELFSectionWriter::visit(const SectionIndexSection &Sec) { + uint8_t *Buf = Out.getBufferStart() + Sec.Offset; + auto *IndexesBuffer = reinterpret_cast(Buf); + std::copy(std::begin(Sec.Indexes), std::end(Sec.Indexes), IndexesBuffer); +} + +void SectionIndexSection::initialize(SectionTableRef SecTable) { + Size = 0; + setSymTab(SecTable.getSectionOfType( + Link, + "Link field value " + Twine(Link) + " in section " + Name + " is invalid", + "Link field value " + Twine(Link) + " in section " + Name + + " is not a symbol table")); + Symbols->setShndxTable(this); +} + +void SectionIndexSection::finalize() { Link = Symbols->Index; } + +void SectionIndexSection::accept(SectionVisitor &Visitor) const { + Visitor.visit(*this); +} + static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { switch (Index) { case SHN_ABS: @@ -172,8 +199,13 @@ return false; } +// Large indexes force us to clarify exactly what this function should do. This +// function should return the value that will appear in st_shndx when written +// out. uint16_t Symbol::getShndx() const { if (DefinedIn != nullptr) { + if (DefinedIn->Index >= SHN_LORESERVE) + return SHN_XINDEX; return DefinedIn->Index; } switch (ShndxType) { @@ -187,6 +219,7 @@ case SYMBOL_HEXAGON_SCOMMON_2: case SYMBOL_HEXAGON_SCOMMON_4: case SYMBOL_HEXAGON_SCOMMON_8: + case SYMBOL_XINDEX: return static_cast(ShndxType); } llvm_unreachable("Symbol with invalid ShndxType encountered"); @@ -207,6 +240,8 @@ Sym.Binding = Bind; Sym.Type = Type; Sym.DefinedIn = DefinedIn; + if (DefinedIn != nullptr) + DefinedIn->HasSymbol = true; if (DefinedIn == nullptr) { if (Shndx >= SHN_LORESERVE) Sym.ShndxType = static_cast(Shndx); @@ -222,6 +257,8 @@ } void SymbolTableSection::removeSectionReferences(const SectionBase *Sec) { + if (SectionIndexTable == Sec) + SectionIndexTable = nullptr; if (SymbolNames == Sec) { error("String table " + SymbolNames->Name + " cannot be removed because it is referenced by the symbol table " + @@ -274,7 +311,17 @@ Info = MaxLocalIndex + 1; } -void SymbolTableSection::addSymbolNames() { +void SymbolTableSection::prepareForLayout() { + // Add all potential section indexes before file layout so that the section + // index section has the approprite size. + if (SectionIndexTable != nullptr) { + for (const auto &Sym : Symbols) { + if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE) + SectionIndexTable->addIndex(Sym->DefinedIn->Index); + else + SectionIndexTable->addIndex(SHN_UNDEF); + } + } // Add all of our strings to SymbolNames so that SymbolNames has the right // size before layout is decided. for (auto &Sym : Symbols) @@ -654,12 +701,32 @@ void ELFBuilder::initSymbolTable(SymbolTableSection *SymTab) { const Elf_Shdr &Shdr = *unwrapOrError(ElfFile.getSection(SymTab->Index)); StringRef StrTabData = unwrapOrError(ElfFile.getStringTableForSymtab(Shdr)); + ArrayRef ShndxData; - for (const auto &Sym : unwrapOrError(ElfFile.symbols(&Shdr))) { + auto Symbols = unwrapOrError(ElfFile.symbols(&Shdr)); + for (const auto &Sym : Symbols) { SectionBase *DefSection = nullptr; StringRef Name = unwrapOrError(Sym.getName(StrTabData)); - if (Sym.st_shndx >= SHN_LORESERVE) { + if (Sym.st_shndx == SHN_XINDEX) { + if (SymTab->getShndxTable() == nullptr) + error("Symbol '" + Name + + "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists."); + if (ShndxData.data() == nullptr) { + const Elf_Shdr &ShndxSec = + *unwrapOrError(ElfFile.getSection(SymTab->getShndxTable()->Index)); + ShndxData = unwrapOrError( + ElfFile.template getSectionContentsAsArray(&ShndxSec)); + if (ShndxData.size() != Symbols.size()) + error("Symbol section index table does not have the same number of " + "entries as the symbol table."); + } + Elf_Word Index = ShndxData[&Sym - Symbols.begin()]; + DefSection = Obj.sections().getSection( + Index, + "Symbol '" + Name + "' has invalid section index " + + Twine(Index)); + } else if (Sym.st_shndx >= SHN_LORESERVE) { if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) { error( "Symbol '" + Name + @@ -669,7 +736,7 @@ } else if (Sym.st_shndx != SHN_UNDEF) { DefSection = Obj.sections().getSection( Sym.st_shndx, "Symbol '" + Name + - "' is defined in invalid section with index " + + "' is defined has invalid section index " + Twine(Sym.st_shndx)); } @@ -699,14 +766,14 @@ } } -SectionBase *SectionTableRef::getSection(uint16_t Index, Twine ErrMsg) { +SectionBase *SectionTableRef::getSection(uint32_t Index, Twine ErrMsg) { if (Index == SHN_UNDEF || Index > Sections.size()) error(ErrMsg); return Sections[Index - 1].get(); } template -T *SectionTableRef::getSectionOfType(uint16_t Index, Twine IndexErrMsg, +T *SectionTableRef::getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg) { if (T *Sec = dyn_cast(getSection(Index, IndexErrMsg))) return Sec; @@ -753,6 +820,11 @@ Obj.SymbolTable = &SymTab; return SymTab; } + case SHT_SYMTAB_SHNDX: { + auto &ShndxSection = Obj.addSection(); + Obj.SectionIndexTable = &ShndxSection; + return ShndxSection; + } case SHT_NOBITS: return Obj.addSection
(Data); default: @@ -783,6 +855,12 @@ Sec.Index = Index++; } + // If a section index table exists we'll need to initialize it before we + // initialize the symbol table because the symbol table might need to + // reference it. + if (Obj.SectionIndexTable) + Obj.SectionIndexTable->initialize(Obj.sections()); + // Now that all of the sections have been added we can fill out some extra // details about symbol tables. We need the symbol table filled out before // any relocations. @@ -825,9 +903,13 @@ readSectionHeaders(); readProgramHeaders(); + uint32_t ShstrIndex = Ehdr.e_shstrndx; + if (ShstrIndex == SHN_XINDEX) + ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link; + Obj.SectionNames = Obj.sections().template getSectionOfType( - Ehdr.e_shstrndx, + ShstrIndex, "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) + " in elf header " + " is invalid", "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) + @@ -893,8 +975,27 @@ Ehdr.e_shentsize = sizeof(Elf_Shdr); if (WriteSectionHeaders) { Ehdr.e_shoff = Obj.SHOffset; - Ehdr.e_shnum = size(Obj.sections()) + 1; - Ehdr.e_shstrndx = Obj.SectionNames->Index; + // """ + // If the number of sections is greater than or equal to + // SHN_LORESERVE (0xff00), this member has the value zero and the actual + // number of section header table entries is contained in the sh_size field + // of the section header at index 0. + // """ + auto Shnum = size(Obj.sections()) + 1; + if (Shnum >= SHN_LORESERVE) + Ehdr.e_shnum = 0; + else + Ehdr.e_shnum = Shnum; + // """ + // If the section name string table section index is greater than or equal + // to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX (0xffff) + // and the actual index of the section name string table section is + // contained in the sh_link field of the section header at index 0. + // """ + if (Obj.SectionNames->Index >= SHN_LORESERVE) + Ehdr.e_shstrndx = SHN_XINDEX; + else + Ehdr.e_shstrndx = Obj.SectionNames->Index; } else { Ehdr.e_shoff = 0; Ehdr.e_shnum = 0; @@ -917,8 +1018,17 @@ Shdr.sh_flags = 0; Shdr.sh_addr = 0; Shdr.sh_offset = 0; - Shdr.sh_size = 0; - Shdr.sh_link = 0; + // See writeEhdr for why we do this. + uint64_t Shnum = size(Obj.sections()) + 1; + if (Shnum >= SHN_LORESERVE) + Shdr.sh_size = Shnum; + else + Shdr.sh_size = 0; + // See writeEhdr for why we do this. + if (Obj.SectionNames != nullptr && Obj.SectionNames->Index >= SHN_LORESERVE) + Shdr.sh_link = Obj.SectionNames->Index; + else + Shdr.sh_link = 0; Shdr.sh_info = 0; Shdr.sh_addralign = 0; Shdr.sh_entsize = 0; @@ -946,9 +1056,10 @@ }); if (SymbolTable != nullptr && ToRemove(*SymbolTable)) SymbolTable = nullptr; - if (SectionNames != nullptr && ToRemove(*SectionNames)) { + if (SectionNames != nullptr && ToRemove(*SectionNames)) SectionNames = nullptr; - } + if (SectionIndexTable != nullptr && ToRemove(*SectionIndexTable)) + SectionIndexTable = nullptr; // Now make sure there are no remaining references to the sections that will // be removed. Sometimes it is impossible to remove a reference so we emit // an error here instead. @@ -1109,16 +1220,59 @@ error("Cannot write section header table because section header string " "table was removed."); - // Make sure we add the names of all the sections. + Obj.sortSections(); + + // We need to assign indexes before we perform layout because we need to know + // if we need large indexes or not. We can assign indexes first and check as + // we go to see if we will actully need large indexes. + bool NeedsLargeIndexes = false; + if (size(Obj.sections()) >= SHN_LORESERVE) { + auto Sections = Obj.sections(); + NeedsLargeIndexes = + std::any_of(Sections.begin() + SHN_LORESERVE, Sections.end(), + [](const SectionBase &Sec) { return Sec.HasSymbol; }); + // TODO: handle case where only one section needs the large index table but + // only needs it because the large index table hasn't been removed yet. + } + + if (NeedsLargeIndexes) { + // This means we definitely need to have a section index table but if we + // already have one then we should use it instead of making a new one. + if (Obj.SymbolTable != nullptr && Obj.SectionIndexTable == nullptr) { + // Addition of a section to the end does not invalidate the indexes of + // other sections and assigns the correct index to the new section. + auto &Shndx = Obj.addSection(); + Obj.SymbolTable->setShndxTable(&Shndx); + Shndx.setSymTab(Obj.SymbolTable); + } + } else { + // Since we don't need SectionIndexTable we should remove it and all + // references to it. + if (Obj.SectionIndexTable != nullptr) { + Obj.removeSections([this](const SectionBase &Sec) { + return &Sec == Obj.SectionIndexTable; + }); + } + } + + // Make sure we add the names of all the sections. Importantly this must be + // done after we decide to add or remove SectionIndexes. if (Obj.SectionNames != nullptr) for (const auto &Section : Obj.sections()) { Obj.SectionNames->addString(Section.Name); } - // Make sure we add the names of all the symbols. + + // Before we can prepare for layout the indexes need to be finalized. + uint64_t Index = 0; + for (auto &Sec : Obj.sections()) + Sec.Index = Index++; + + // The symbol table does not update all other sections on update. For + // instance, symbol names are not added as new symbols are added. This means + // that some sections, like .strtab, don't yet have their final size. if (Obj.SymbolTable != nullptr) - Obj.SymbolTable->addSymbolNames(); + Obj.SymbolTable->prepareForLayout(); - Obj.sortSections(); assignOffsets(); // Finalize SectionNames first so that we can assign name indexes.