diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -128,6 +128,8 @@ Expected> getSHNDXTable(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const; + Expected getDynSymtabSize() const; + StringRef getRelocationTypeName(uint32_t Type) const; void getRelocationTypeName(uint32_t Type, SmallVectorImpl &Result) const; @@ -519,6 +521,100 @@ return getStringTable(Sections[Index], WarnHandler); } +/// This function finds the number of dynamic symbols using a GNU hash table. +/// +/// @param Table The GNU hash table for .dynsym. +template +static Expected +getDynSymtabSizeFromGnuHash(const typename ELFT::GnuHash &Table, const void *BufEnd) { + using Elf_Word = typename ELFT::Word; + if (Table.nbuckets == 0) + return Table.symndx + 1; + uint64_t LastSymIdx = 0; + // Find the index of the first symbol in the last chain. + for (Elf_Word Val : Table.buckets()) + LastSymIdx = std::max(LastSymIdx, (uint64_t)Val); + const Elf_Word *It = + reinterpret_cast(Table.values(LastSymIdx).end()); + // Locate the end of the chain to find the last symbol index. + while (It <= BufEnd && (*It & 1) == 0) { + ++LastSymIdx; + ++It; + } + if (It > BufEnd) { + return createStringError( + object_error::parse_failed, + "no terminator found for GNU hash section before buffer end"); + } + return LastSymIdx + 1; +} + +/// This function determines the number of dynamic symbols. It reads section +/// headers first. If section headers are not available, the number of +/// symbols will be inferred by parsing dynamic hash tables. +template +Expected ELFFile::getDynSymtabSize() const { + // Read .dynsym section header first if available. + Expected SectionsOrError = sections(); + if (!SectionsOrError) + return SectionsOrError.takeError(); + for (const Elf_Shdr &Sec : *SectionsOrError) { + if (Sec.sh_type == ELF::SHT_DYNSYM) { + if (Sec.sh_size % Sec.sh_entsize != 0) { + std::string ErrMsg; + raw_string_ostream Out(ErrMsg); + Out << "SHT_DYNSYM section has sh_size (" << Sec.sh_size + << ") % sh_entsize (" << Sec.sh_entsize << ") that is not 0"; + return createStringError(object_error::parse_failed, Out.str()); + } + return Sec.sh_size / Sec.sh_entsize; + } + } + + if (!SectionsOrError->empty()) { + // Section headers are available but .dynsym header is not found. + // Return 0 as .dynsym does not exist. + return 0; + } + + // Section headers do not exist. Falling back to infer + // upper bound of .dynsym from .gnu.hash and .hash. + Expected DynTable = dynamicEntries(); + if (!DynTable) + return DynTable.takeError(); + llvm::Optional ElfHash; + llvm::Optional ElfGnuHash; + for (const Elf_Dyn &Entry : *DynTable) { + switch (Entry.d_tag) { + case ELF::DT_HASH: + ElfHash = Entry.d_un.d_ptr; + break; + case ELF::DT_GNU_HASH: + ElfGnuHash = Entry.d_un.d_ptr; + break; + } + } + if (ElfGnuHash) { + Expected TablePtr = toMappedAddr(*ElfGnuHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_GnuHash *Table = + reinterpret_cast(TablePtr.get()); + return getDynSymtabSizeFromGnuHash(*Table, + this->Buf.bytes_end()); + } + + // Search SYSV hash table to try to find the upper bound of dynsym. + if (ElfHash) { + Expected TablePtr = toMappedAddr(*ElfHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_Hash *Table = reinterpret_cast(TablePtr.get()); + return Table->nchain; + } + return 0; +} + template ELFFile::ELFFile(StringRef Object) : Buf(Object) {} template diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp --- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp +++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp @@ -440,62 +440,6 @@ return Error::success(); } -/// This function finds the number of dynamic symbols using a GNU hash table. -/// -/// @param Table The GNU hash table for .dynsym. -template -static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) { - using Elf_Word = typename ELFT::Word; - if (Table.nbuckets == 0) - return Table.symndx + 1; - uint64_t LastSymIdx = 0; - uint64_t BucketVal = 0; - // Find the index of the first symbol in the last chain. - for (Elf_Word Val : Table.buckets()) { - BucketVal = std::max(BucketVal, (uint64_t)Val); - } - LastSymIdx += BucketVal; - const Elf_Word *It = - reinterpret_cast(Table.values(BucketVal).end()); - // Locate the end of the chain to find the last symbol index. - while ((*It & 1) == 0) { - LastSymIdx++; - It++; - } - return LastSymIdx + 1; -} - -/// This function determines the number of dynamic symbols. -/// Without access to section headers, the number of symbols must be determined -/// by parsing dynamic hash tables. -/// -/// @param Dyn Entries with the locations of hash tables. -/// @param ElfFile The ElfFile that the section contents reside in. -template -static Expected getNumSyms(DynamicEntries &Dyn, - const ELFFile &ElfFile) { - using Elf_Hash = typename ELFT::Hash; - using Elf_GnuHash = typename ELFT::GnuHash; - // Search GNU hash table to try to find the upper bound of dynsym. - if (Dyn.GnuHash.hasValue()) { - Expected TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash); - if (!TablePtr) - return TablePtr.takeError(); - const Elf_GnuHash *Table = - reinterpret_cast(TablePtr.get()); - return getDynSymtabSize(*Table); - } - // Search SYSV hash table to try to find the upper bound of dynsym. - if (Dyn.ElfHash.hasValue()) { - Expected TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash); - if (!TablePtr) - return TablePtr.takeError(); - const Elf_Hash *Table = reinterpret_cast(TablePtr.get()); - return Table->nchain; - } - return 0; -} - /// This function extracts symbol type from a symbol's st_info member and /// maps it to an ELFSymbolType enum. /// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported. @@ -637,7 +581,7 @@ } // Populate Symbols from .dynsym table and dynamic string table. - Expected SymCount = getNumSyms(DynEnt, ElfFile); + Expected SymCount = ElfFile.getDynSymtabSize(); if (!SymCount) return SymCount.takeError(); if (*SymCount > 0) { diff --git a/llvm/test/tools/llvm-elfabi/read-elf-dynsym.test b/llvm/test/tools/llvm-elfabi/read-elf-dynsym.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-elfabi/read-elf-dynsym.test @@ -0,0 +1,117 @@ +## Test reading ELF with .dynsym under the following conditions: +## * Section headers are available. +## * Section headers are stripped but there is a DT_GNU_HASH dynamic tag. +## * Section headers are stripped but there is a DT_HASH dynamic tag. + +## Test if llvm-elfabi reads DT_SYMTAB size through section headers by puting the wrong terminator in DT_GNU_HASH. +# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" +# RUN: llvm-elfabi --elf %tfull --emit-tbe=- | FileCheck %s + +## Test if llvm-elfabi fails to read DT_SYMTAB size through section headers when the value of sh_entsize is invalid. +# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DENTSIZE="0x19" +# RUN: not llvm-elfabi --elf %tfull --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=BADENTSIZE + +## Test if llvm-elfabi reads DT_SYMTAB size through DT_GNU_HASH. +# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" +# RUN: llvm-strip --strip-sections %tw.gnu.hash +# RUN: llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- | FileCheck %s + +## Test if llvm-elfabi fails to read DT_SYMTAB size through DT_GNU_HASH when there is no terminator. +# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0xA]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" +# RUN: llvm-strip --strip-sections %tw.gnu.hash +# RUN: not llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=NOTERMINATOR + +# CHECK: --- !tapi-tbe +# CHECK-NEXT: TbeVersion: 1.0 +# CHECK-NEXT: Arch: AArch64 +# CHECK-NEXT: Symbols: +# CHECK-NEXT: bar: { Type: Object, Size: 0 } +# CHECK-NEXT: foo: { Type: Func } +# CHECK-NEXT: ... + +# BADENTSIZE: SHT_DYNSYM section has sh_size (72) % sh_entsize (25) that is not 0 + +# NOTERMINATOR: error: no terminator found for GNU hash section before buffer end + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_AARCH64 +Sections: + - Name: .text + Type: SHT_PROGBITS + - Name: .data + Type: SHT_PROGBITS + - Name: .strtab + Type: SHT_STRTAB + - Name: .shstrtab + Type: SHT_STRTAB + - Name: .dynsym + Type: SHT_DYNSYM + Flags: [ SHF_ALLOC ] + EntSize: [[ENTSIZE=0x18]] + Address: 0x400 + AddressAlign: 0x400 + - Name: .dynstr + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + Address: 0x600 + AddressAlign: 0x200 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_ALLOC ] + Address: 0x800 + AddressAlign: 0x200 + Entries: + - Tag: DT_STRTAB + Value: 0x600 + - Tag: DT_STRSZ + Value: 9 + - Tag: DT_SYMTAB + Value: 0x400 + - Tag: [[TAG1]] + Value: [[VAL1]] + - Tag: DT_NULL + Value: 0 + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Address: 0xA00 + AddressAlign: 0x200 + Bucket: [ 1 ] + Chain: [ 1, 2, 3 ] + - Name: .gnu.hash + Type: SHT_GNU_HASH + Flags: [ SHF_ALLOC ] + Address: 0xC00 + AddressAlign: 0x200 + Header: + SymNdx: 0x1 + Shift2: 0x2 + MaskWords: 2 + NBuckets: 2 + BloomFilter: [0x3, 0x4] + HashBuckets: [0x0, 0x1] + HashValues: [[GNUHASHVALUE]] +DynamicSymbols: + - Name: foo + Type: STT_FUNC + Section: .strtab + Value: 0x100 + Binding: 1 + - Name: bar + Type: STT_OBJECT + Section: .strtab + Value: 0x200 + Binding: 1 +ProgramHeaders: + - Type: PT_LOAD + VAddr: 0x400 + FirstSec: .dynsym + LastSec: .gnu.hash + - Type: PT_DYNAMIC + VAddr: 0x800 + FirstSec: .dynamic + LastSec: .dynamic