Index: llvm/test/tools/llvm-elfabi/binary-read-syms-gnu-hash.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-elfabi/binary-read-syms-gnu-hash.test @@ -0,0 +1,22 @@ +# RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=- | FileCheck %s + +# CHECK: --- !tapi-tbe +# CHECK-NEXT: TbeVersion: 1.0 +# CHECK-NEXT: SoName: libsomething.so +# CHECK-NEXT: Arch: x86_64 +# CHECK-NEXT: NeededLibs: +# CHECK-NEXT: - libm.so.6 +# CHECK-NEXT: - libc.so.6 +# CHECK-NEXT: - ld-linux-x86-64.so.2 +# CHECK-NEXT: Symbols: +# CHECK-NEXT: AGlobalInteger: { Type: Object, Size: 4 } +# CHECK-NEXT: AThreadLocalLongInteger: { Type: TLS, Size: 8 } +# CHECK-NEXT: _ITM_deregisterTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _ITM_registerTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _Z11rotateArrayPii: { Type: Func } +# CHECK-NEXT: __cxa_finalize: { Type: Func, Undefined: true, Weak: true } +# CHECK-NEXT: __gmon_start__: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: __tls_get_addr: { Type: Func, Undefined: true } +# CHECK-NEXT: _fini: { Type: Func } +# CHECK-NEXT: _init: { Type: Func } +# CHECK-NEXT: ... Index: llvm/test/tools/llvm-elfabi/binary-read-syms-sysv-hash.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-elfabi/binary-read-syms-sysv-hash.test @@ -0,0 +1,22 @@ +# RUN: llvm-elfabi --elf %p/Inputs/sysv_hash.so --emit-tbe=- | FileCheck %s + +# CHECK: --- !tapi-tbe +# CHECK-NEXT: TbeVersion: 1.0 +# CHECK-NEXT: SoName: libsomething.so +# CHECK-NEXT: Arch: x86_64 +# CHECK-NEXT: NeededLibs: +# CHECK-NEXT: - libm.so.6 +# CHECK-NEXT: - libc.so.6 +# CHECK-NEXT: - ld-linux-x86-64.so.2 +# CHECK-NEXT: Symbols: +# CHECK-NEXT: AGlobalInteger: { Type: Object, Size: 4 } +# CHECK-NEXT: AThreadLocalLongInteger: { Type: TLS, Size: 8 } +# CHECK-NEXT: _ITM_deregisterTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _ITM_registerTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _Z11rotateArrayPii: { Type: Func } +# CHECK-NEXT: __cxa_finalize: { Type: Func, Undefined: true, Weak: true } +# CHECK-NEXT: __gmon_start__: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: __tls_get_addr: { Type: Func, Undefined: true } +# CHECK-NEXT: _fini: { Type: Func } +# CHECK-NEXT: _init: { Type: Func } +# CHECK-NEXT: ... Index: llvm/tools/llvm-elfabi/ELFObjHandler.cpp =================================================================== --- llvm/tools/llvm-elfabi/ELFObjHandler.cpp +++ llvm/tools/llvm-elfabi/ELFObjHandler.cpp @@ -31,6 +31,11 @@ uint64_t StrSize = 0; Optional SONameOffset; std::vector NeededLibNames; + // Symbol table: + uint64_t DynSymAddr = 0; + // Hash tables: + Optional ElfHash; + Optional GnuHash; }; /// This function behaves similarly to StringRef::substr(), but attempts to @@ -81,6 +86,7 @@ // Search .dynamic for relevant entries. bool FoundDynStr = false; bool FoundDynStrSz = false; + bool FoundDynSym = false; for (auto &Entry : DynTable) { switch (Entry.d_tag) { case DT_SONAME: @@ -97,6 +103,15 @@ case DT_NEEDED: Dyn.NeededLibNames.push_back(Entry.d_un.d_val); break; + case DT_SYMTAB: + Dyn.DynSymAddr = Entry.d_un.d_ptr; + FoundDynSym = true; + break; + case DT_HASH: + Dyn.ElfHash = Entry.d_un.d_ptr; + break; + case DT_GNU_HASH: + Dyn.GnuHash = Entry.d_un.d_ptr; } } @@ -108,6 +123,10 @@ return createError( "Couldn't determine dynamic string table size (no DT_STRSZ entry)"); } + if (!FoundDynSym) { + return createError( + "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)"); + } if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) { return createStringError( object_error::parse_failed, @@ -126,6 +145,139 @@ return Error::success(); } +/// This function finds the number of dynamic symbols using a GNU hash table. +/// +/// @param Table The GNU hash table for .dynsym. +template +static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) { + using Elf_Word = typename ELFT::Word; + if (Table.nbuckets == 0) + return Table.symndx + 1; + uint64_t LastSymIdx = 0; + uint64_t BucketVal = 0; + // Find the index of the first symbol in the last chain. + for (Elf_Word Val : Table.buckets()) { + BucketVal = std::max(BucketVal, (uint64_t)Val); + } + LastSymIdx += BucketVal; + const Elf_Word *It = + reinterpret_cast(Table.values(BucketVal).end()); + // Locate the end of the chain to find the last symbol index. + while ((*It & 1) == 0) { + LastSymIdx++; + It++; + } + return LastSymIdx + 1; +} + +/// This function determines the number of dynamic symbols. +/// Without access to section headers, the number of symbols must be determined +/// by parsing dynamic hash tables. +/// +/// @param Dyn Entries with the locations of hash tables. +/// @param ElfFile The ElfFile that the section contents reside in. +template +static Expected getNumSyms(DynamicEntries &Dyn, + const ELFFile &ElfFile) { + using Elf_Hash = typename ELFT::Hash; + using Elf_GnuHash = typename ELFT::GnuHash; + // Search GNU hash table to try to find the upper bound of dynsym. + if (Dyn.GnuHash.hasValue()) { + Expected TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_GnuHash *Table = + reinterpret_cast(TablePtr.get()); + return getDynSymtabSize(*Table); + } + // Search SYSV hash table to try to find the upper bound of dynsym. + if (Dyn.ElfHash.hasValue()) { + Expected TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_Hash *Table = reinterpret_cast(TablePtr.get()); + return Table->nchain; + } + return 0; +} + +/// This function extracts symbol type from a symbol's st_info member and +/// maps it to an ELFSymbolType enum. +/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported. +/// Other symbol types are mapped to ELFSymbolType::Unknown. +/// +/// @param Info Binary symbol st_info to extract symbol type from. +static ELFSymbolType convertInfoToType(uint8_t Info) { + Info = Info & 0xf; + switch (Info) { + case ELF::STT_NOTYPE: + return ELFSymbolType::NoType; + case ELF::STT_OBJECT: + return ELFSymbolType::Object; + case ELF::STT_FUNC: + return ELFSymbolType::Func; + case ELF::STT_TLS: + return ELFSymbolType::TLS; + default: + return ELFSymbolType::Unknown; + } +} + +/// This function populates all members of an ELFSymbol from a binary ELFT::Sym. +/// +/// @param TargetSym ELFSymbol to populate. +/// @param RawSym ELFT::Sym to extract symbol information from. +template +static void populateSymbol(ELFSymbol &TargetSym, + const typename ELFT::Sym &RawSym) { + uint8_t Binding = RawSym.getBinding(); + if (Binding == STB_WEAK) + TargetSym.Weak = true; + else + TargetSym.Weak = false; + + TargetSym.Undefined = RawSym.isUndefined(); + TargetSym.Type = convertInfoToType(RawSym.st_info); + + if (TargetSym.Type == ELFSymbolType::Func) { + TargetSym.Size = 0; + } else { + TargetSym.Size = RawSym.st_size; + } +} + +/// This function populates an ELFStub with symbols using information read +/// from an ELF binary. +/// +/// @param TargetStub ELFStub to add symbols to. +/// @param DynSym Range of dynamic symbols to add to TargetStub. +/// @param DynStr StringRef to the dynamic string table. +template +static Error populateSymbols(ELFStub &TargetStub, + const typename ELFT::SymRange DynSym, + StringRef DynStr) { + // Skips the first symbol since it's the NULL symbol. + for (auto RawSym : DynSym.drop_front(1)) { + // If a symbol does not have global or weak binding, ignore it. + uint8_t Binding = RawSym.getBinding(); + if (!(Binding == STB_GLOBAL || Binding == STB_WEAK)) + continue; + // If a symbol doesn't have default or protected visibility, ignore it. + uint8_t Visibility = RawSym.getVisibility(); + if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED)) + continue; + // Create an ELFSymbol and populate it with information from the symbol table entry. + Expected SymName = terminatedSubstr(DynStr, RawSym.st_name); + if (!SymName) + return SymName.takeError(); + ELFSymbol Sym(*SymName); + populateSymbol(Sym, RawSym); + TargetStub.Symbols.insert(std::move(Sym)); + // TODO: Populate symbol warning. + } + return Error::success(); +} + /// Returns a new ELFStub with all members populated from an ELFObjectFile. /// @param ElfObj Source ELFObjectFile. template @@ -133,6 +285,8 @@ buildStub(const ELFObjectFile &ElfObj) { using Elf_Dyn_Range = typename ELFT::DynRange; using Elf_Phdr_Range = typename ELFT::PhdrRange; + using Elf_Sym_Range = typename ELFT::SymRange; + using Elf_Sym = typename ELFT::Sym; std::unique_ptr DestStub = make_unique(); const ELFFile *ElfFile = ElfObj.getELFFile(); // Fetch .dynamic table. @@ -152,7 +306,7 @@ if (Error Err = populateDynamic(DynEnt, *DynTable)) return std::move(Err); - // Convert .dynstr address to an offset. + // Get pointer to in-memory location of .dynstr section. Expected DynStrPtr = ElfFile->toMappedAddr(DynEnt.StrTabAddr); if (!DynStrPtr) @@ -185,7 +339,25 @@ DestStub->NeededLibs.push_back(*LibNameOrErr); } - // TODO: Populate Symbols from .dynsym table and linked string table. + // Populate Symbols from .dynsym table and dynamic string table. + Expected SymCount = getNumSyms(DynEnt, *ElfFile); + if (!SymCount) + return SymCount.takeError(); + if (*SymCount > 0) { + // Get pointer to in-memory location of .dynsym section. + Expected DynSymPtr = + ElfFile->toMappedAddr(DynEnt.DynSymAddr); + if (!DynSymPtr) + return appendToError(DynSymPtr.takeError(), + "when locating .dynsym section contents"); + Elf_Sym_Range DynSyms = + ArrayRef(reinterpret_cast(*DynSymPtr), + *SymCount); + Error SymReadError = populateSymbols(*DestStub, DynSyms, DynStr); + if (SymReadError) + return appendToError(std::move(SymReadError), + "when reading dynamic symbols"); + } return std::move(DestStub); }