Index: llvm/test/tools/llvm-elfabi/binary-read-add-soname.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-add-soname.test +++ llvm/test/tools/llvm-elfabi/binary-read-add-soname.test @@ -18,11 +18,12 @@ Flags: [ SHF_ALLOC ] Address: 0x0008 AddressAlign: 8 - Content: "0a0000000000000001000000000000000500000000000000000000000000000000000000000000000000000000000000" + Content: "0a000000000000000100000000000000050000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000" # DT_STRSZ 1 (0x1) # DT_STRTAB 0x0 + # DT_SYMTAB 0x0 # DT_NULL 0x0 - Size: 48 + Size: 64 Link: .dynstr ProgramHeaders: - Type: PT_LOAD Index: llvm/test/tools/llvm-elfabi/binary-read-arch.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-arch.test +++ llvm/test/tools/llvm-elfabi/binary-read-arch.test @@ -18,11 +18,12 @@ Flags: [ SHF_ALLOC ] Address: 0x0008 AddressAlign: 8 - Content: "0a0000000000000001000000000000000500000000000000000000000000000000000000000000000000000000000000" + Content: "0a000000000000000100000000000000050000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000" # DT_STRSZ 1 (0x1) # DT_STRTAB 0x0 + # DT_SYMTAB 0x0 # DT_NULL 0x0 - Size: 48 + Size: 64 Link: .dynstr ProgramHeaders: - Type: PT_LOAD Index: llvm/test/tools/llvm-elfabi/binary-read-bad-soname.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-bad-soname.test +++ llvm/test/tools/llvm-elfabi/binary-read-bad-soname.test @@ -18,12 +18,13 @@ Flags: [ SHF_ALLOC ] Address: 0x0008 AddressAlign: 8 - Content: "0e000000000000000d000000000000000a0000000000000001000000000000000500000000000000000000000000000000000000000000000000000000000000" + Content: "0e000000000000000d000000000000000a000000000000000100000000000000050000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000" # DT_SONAME 13 (0x0d) # DT_STRSZ 1 (0x01) # DT_STRTAB 0x0 + # DT_SYMTAB 0x0 # DT_NULL 0x0 - Size: 64 + Size: 80 Link: .dynstr ProgramHeaders: - Type: PT_LOAD Index: llvm/test/tools/llvm-elfabi/binary-read-bad-vaddr.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-bad-vaddr.test +++ llvm/test/tools/llvm-elfabi/binary-read-bad-vaddr.test @@ -18,12 +18,13 @@ Flags: [ SHF_ALLOC ] Address: 0x1008 AddressAlign: 8 - Content: "0e0000000000000000000000000000000a0000000000000001000000000000000500000000000000600200000000000000000000000000000000000000000000" + Content: "0e0000000000000000000000000000000a000000000000000100000000000000050000000000000060020000000000000600000000000000001000000000000000000000000000000000000000000000" # DT_SONAME 0 # DT_STRSZ 1 # DT_STRTAB 0x0260 # Bad vaddr (no PT_LOAD for 0x0000 to 0x0FFF) + # DT_SYMTAB 0x1000 # DT_NULL 0x0 - Size: 64 + Size: 80 Link: .dynstr ProgramHeaders: - Type: PT_LOAD Index: llvm/test/tools/llvm-elfabi/binary-read-neededlibs-bad-offset.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-neededlibs-bad-offset.test +++ llvm/test/tools/llvm-elfabi/binary-read-neededlibs-bad-offset.test @@ -18,15 +18,16 @@ Type: SHT_DYNAMIC Flags: [ SHF_ALLOC ] Address: 0x1024 - Content: "010000000000000001000000000000000e0000000000000015000000000000000100000000000000ffff0000000000000a0000000000000024000000000000000500000000000000001000000000000000000000000000000000000000000000" + Content: "010000000000000001000000000000000e0000000000000015000000000000000100000000000000ffff0000000000000a000000000000002400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000" # DT_NEEDED 1 (0x01) # DT_SONAME 21 (0x15) # Bad DT_NEEDED entry (offset outside string table): # DT_NEEDED 65535 (0xffff) # DT_STRSZ 36 (0x24) # DT_STRTAB 0x1000 + # DT_SYMTAB 0x1000 # DT_NULL 0x0 - Size: 96 + Size: 112 ProgramHeaders: - Type: PT_LOAD Flags: [ PF_R ] Index: llvm/test/tools/llvm-elfabi/binary-read-neededlibs.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-neededlibs.test +++ llvm/test/tools/llvm-elfabi/binary-read-neededlibs.test @@ -18,14 +18,15 @@ Type: SHT_DYNAMIC Flags: [ SHF_ALLOC ] Address: 0x1024 - Content: "010000000000000001000000000000000e00000000000000150000000000000001000000000000000b000000000000000a0000000000000024000000000000000500000000000000001000000000000000000000000000000000000000000000" + Content: "010000000000000001000000000000000e00000000000000150000000000000001000000000000000b000000000000000a000000000000002400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000" # DT_NEEDED 1 (0x01) # DT_SONAME 21 (0x15) # DT_NEEDED 11 (0x0b) # DT_STRSZ 36 (0x24) # DT_STRTAB 0x1000 + # DT_SYMTAB 0x1000 # DT_NULL 0x0 - Size: 96 + Size: 112 ProgramHeaders: - Type: PT_LOAD Flags: [ PF_R ] Index: llvm/test/tools/llvm-elfabi/binary-read-replace-soname.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-replace-soname.test +++ llvm/test/tools/llvm-elfabi/binary-read-replace-soname.test @@ -20,12 +20,13 @@ Flags: [ SHF_ALLOC ] Address: 0x1018 AddressAlign: 8 - Content: "0e0000000000000005000000000000000a0000000000000014000000000000000500000000000000001000000000000000000000000000000000000000000000" + Content: "0e0000000000000005000000000000000a000000000000001400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000" # DT_SONAME 5 (0x05) # DT_STRSZ 20 (0x14) # DT_STRTAB 0x1000 + # DT_SYMTAB 0x1000 # DT_NULL 0x0 - Size: 64 + Size: 80 Link: .dynstr ProgramHeaders: - Type: PT_LOAD Index: llvm/test/tools/llvm-elfabi/binary-read-soname-no-null.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-soname-no-null.test +++ llvm/test/tools/llvm-elfabi/binary-read-soname-no-null.test @@ -19,12 +19,13 @@ Flags: [ SHF_ALLOC ] Address: 0x1018 AddressAlign: 8 - Content: "0e0000000000000005000000000000000a000000000000000f000000000000000500000000000000001000000000000000000000000000000000000000000000" + Content: "0e0000000000000005000000000000000a000000000000000f00000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000" # DT_SONAME 5 (0x05) # DT_STRSZ 15 (0x0F) # DT_STRTAB 0x1000 + # DT_SYMTAB 0x1000 # DT_NULL 0x0 - Size: 64 + Size: 80 Link: .dynstr ProgramHeaders: - Type: PT_LOAD Index: llvm/test/tools/llvm-elfabi/binary-read-soname.test =================================================================== --- llvm/test/tools/llvm-elfabi/binary-read-soname.test +++ llvm/test/tools/llvm-elfabi/binary-read-soname.test @@ -19,12 +19,13 @@ Flags: [ SHF_ALLOC ] Address: 0x1018 AddressAlign: 8 - Content: "0e0000000000000005000000000000000a0000000000000014000000000000000500000000000000001000000000000000000000000000000000000000000000" + Content: "0e0000000000000005000000000000000a000000000000001400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000" # DT_SONAME 5 (0x05) # DT_STRSZ 20 (0x14) # DT_STRTAB 0x1000 + # DT_SYMTAB 0x1000 # DT_NULL 0x0 - Size: 64 + Size: 80 Link: .dynstr ProgramHeaders: - Type: PT_LOAD Index: llvm/test/tools/llvm-elfabi/binary-read-syms-gnu-hash.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-elfabi/binary-read-syms-gnu-hash.test @@ -0,0 +1,22 @@ +# RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=- | FileCheck %s + +# CHECK: --- !tapi-tbe +# CHECK-NEXT: TbeVersion: 1.0 +# CHECK-NEXT: SoName: libsomething.so +# CHECK-NEXT: Arch: x86_64 +# CHECK-NEXT: NeededLibs: +# CHECK-NEXT: - libm.so.6 +# CHECK-NEXT: - libc.so.6 +# CHECK-NEXT: - ld-linux-x86-64.so.2 +# CHECK-NEXT: Symbols: +# CHECK-NEXT: AGlobalInteger: { Type: Object, Size: 4 } +# CHECK-NEXT: AThreadLocalLongInteger: { Type: TLS, Size: 8 } +# CHECK-NEXT: _ITM_deregisterTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _ITM_registerTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _Z11rotateArrayPii: { Type: Func } +# CHECK-NEXT: __cxa_finalize: { Type: Func, Undefined: true, Weak: true } +# CHECK-NEXT: __gmon_start__: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: __tls_get_addr: { Type: Func, Undefined: true } +# CHECK-NEXT: _fini: { Type: Func } +# CHECK-NEXT: _init: { Type: Func } +# CHECK-NEXT: ... Index: llvm/test/tools/llvm-elfabi/binary-read-syms-sysv-hash.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-elfabi/binary-read-syms-sysv-hash.test @@ -0,0 +1,22 @@ +# RUN: llvm-elfabi --elf %p/Inputs/sysv_hash.so --emit-tbe=- | FileCheck %s + +# CHECK: --- !tapi-tbe +# CHECK-NEXT: TbeVersion: 1.0 +# CHECK-NEXT: SoName: libsomething.so +# CHECK-NEXT: Arch: x86_64 +# CHECK-NEXT: NeededLibs: +# CHECK-NEXT: - libm.so.6 +# CHECK-NEXT: - libc.so.6 +# CHECK-NEXT: - ld-linux-x86-64.so.2 +# CHECK-NEXT: Symbols: +# CHECK-NEXT: AGlobalInteger: { Type: Object, Size: 4 } +# CHECK-NEXT: AThreadLocalLongInteger: { Type: TLS, Size: 8 } +# CHECK-NEXT: _ITM_deregisterTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _ITM_registerTMCloneTable: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: _Z11rotateArrayPii: { Type: Func } +# CHECK-NEXT: __cxa_finalize: { Type: Func, Undefined: true, Weak: true } +# CHECK-NEXT: __gmon_start__: { Type: NoType, Undefined: true, Weak: true } +# CHECK-NEXT: __tls_get_addr: { Type: Func, Undefined: true } +# CHECK-NEXT: _fini: { Type: Func } +# CHECK-NEXT: _init: { Type: Func } +# CHECK-NEXT: ... Index: llvm/tools/llvm-elfabi/ELFObjHandler.cpp =================================================================== --- llvm/tools/llvm-elfabi/ELFObjHandler.cpp +++ llvm/tools/llvm-elfabi/ELFObjHandler.cpp @@ -31,6 +31,11 @@ uint64_t StrSize = 0; Optional SONameOffset; std::vector NeededLibNames; + // Symbol table: + uint64_t DynSymAddr = 0; + // Hash tables: + Optional ElfHash; + Optional GnuHash; }; /// This function behaves similarly to StringRef::substr(), but attempts to @@ -81,6 +86,7 @@ // Search .dynamic for relevant entries. bool FoundDynStr = false; bool FoundDynStrSz = false; + bool FoundDynSym = false; for (auto &Entry : DynTable) { switch (Entry.d_tag) { case DT_SONAME: @@ -97,6 +103,15 @@ case DT_NEEDED: Dyn.NeededLibNames.push_back(Entry.d_un.d_val); break; + case DT_SYMTAB: + Dyn.DynSymAddr = Entry.d_un.d_ptr; + FoundDynSym = true; + break; + case DT_HASH: + Dyn.ElfHash = Entry.d_un.d_ptr; + break; + case DT_GNU_HASH: + Dyn.GnuHash = Entry.d_un.d_ptr; } } @@ -108,6 +123,10 @@ return createError( "Couldn't determine dynamic string table size (no DT_STRSZ entry)"); } + if (!FoundDynSym) { + return createError( + "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)"); + } if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) { return createStringError( object_error::parse_failed, @@ -126,6 +145,142 @@ return Error::success(); } +/// This function finds the number of dynamic symbols using a GNU hash table. +/// +/// @param Table The GNU hash table for .dynsym. +template +static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) { + using Elf_Word = typename ELFT::Word; + if (Table.nbuckets == 0) + return Table.symndx + 1; + uint64_t LastSymIdx = 0; + uint64_t BucketVal = 0; + // Find the index of the first symbol in the last chain. + for (Elf_Word Val : Table.buckets()) { + BucketVal = std::max(BucketVal, (uint64_t)Val); + } + LastSymIdx += BucketVal; + const Elf_Word *It = + reinterpret_cast(Table.values(BucketVal).end()); + // Locate the end of the chain to find the last symbol index. + while ((*It & 1) == 0) { + LastSymIdx++; + It++; + } + return LastSymIdx + 1; +} + +/// This function determines the number of dynamic symbols. +/// Without access to section headers, the number of symbols must be determined +/// by parsing dynamic hash tables. +/// +/// @param Dyn Entries with the locations of hash tables. +/// @param ElfFile The ElfFile that the section contents reside in. +template +static Expected getNumSyms(DynamicEntries &Dyn, + const ELFFile &ElfFile) { + using Elf_Hash = typename ELFT::Hash; + using Elf_GnuHash = typename ELFT::GnuHash; + // Search GNU hash table to try to find the upper bound of dynsym. + if (Dyn.GnuHash.hasValue()) { + Expected TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_GnuHash *Table = + reinterpret_cast(TablePtr.get()); + return getDynSymtabSize(*Table); + } + // Search SYSV hash table to try to find the upper bound of dynsym. + if (Dyn.ElfHash.hasValue()) { + Expected TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_Hash *Table = reinterpret_cast(TablePtr.get()); + return Table->nchain; + } + return 0; +} + +/// This function extracts symbol type from a symbol's st_info member and +/// maps it to an ELFSymbolType enum. +/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported. +/// Other symbol types are mapped to ELFSymbolType::Unknown. +/// +/// @param Info Binary symbol st_info to extract symbol type from. +static ELFSymbolType convertInfoToType(uint8_t Info) { + Info = Info & 0xf; + switch (Info) { + case ELF::STT_NOTYPE: + return ELFSymbolType::NoType; + case ELF::STT_OBJECT: + return ELFSymbolType::Object; + case ELF::STT_FUNC: + return ELFSymbolType::Func; + case ELF::STT_TLS: + return ELFSymbolType::TLS; + default: + return ELFSymbolType::Unknown; + } +} + +/// This function creates an ELFSymbol and populates all members using +/// information from a binary ELFT::Sym. +/// +/// @param SymName The desired name of the ELFSymbol. +/// @param RawSym ELFT::Sym to extract symbol information from. +template +static ELFSymbol createELFSym(StringRef SymName, + const typename ELFT::Sym &RawSym) { + ELFSymbol TargetSym(SymName); + uint8_t Binding = RawSym.getBinding(); + if (Binding == STB_WEAK) + TargetSym.Weak = true; + else + TargetSym.Weak = false; + + TargetSym.Undefined = RawSym.isUndefined(); + TargetSym.Type = convertInfoToType(RawSym.st_info); + + if (TargetSym.Type == ELFSymbolType::Func) { + TargetSym.Size = 0; + } else { + TargetSym.Size = RawSym.st_size; + } + return TargetSym; +} + +/// This function populates an ELFStub with symbols using information read +/// from an ELF binary. +/// +/// @param TargetStub ELFStub to add symbols to. +/// @param DynSym Range of dynamic symbols to add to TargetStub. +/// @param DynStr StringRef to the dynamic string table. +template +static Error populateSymbols(ELFStub &TargetStub, + const typename ELFT::SymRange DynSym, + StringRef DynStr) { + // Skips the first symbol since it's the NULL symbol. + for (auto RawSym : DynSym.drop_front(1)) { + // If a symbol does not have global or weak binding, ignore it. + uint8_t Binding = RawSym.getBinding(); + if (!(Binding == STB_GLOBAL || Binding == STB_WEAK)) + continue; + // If a symbol doesn't have default or protected visibility, ignore it. + uint8_t Visibility = RawSym.getVisibility(); + if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED)) + continue; + // Create an ELFSymbol and populate it with information from the symbol + // table entry. + Expected SymName = terminatedSubstr(DynStr, RawSym.st_name); + if (!SymName) + return SymName.takeError(); + ELFSymbol Sym = createELFSym(*SymName, RawSym); + TargetStub.Symbols.insert(std::move(Sym)); + // TODO: Populate symbol warning. + } + return Error::success(); +} + /// Returns a new ELFStub with all members populated from an ELFObjectFile. /// @param ElfObj Source ELFObjectFile. template @@ -133,6 +288,8 @@ buildStub(const ELFObjectFile &ElfObj) { using Elf_Dyn_Range = typename ELFT::DynRange; using Elf_Phdr_Range = typename ELFT::PhdrRange; + using Elf_Sym_Range = typename ELFT::SymRange; + using Elf_Sym = typename ELFT::Sym; std::unique_ptr DestStub = make_unique(); const ELFFile *ElfFile = ElfObj.getELFFile(); // Fetch .dynamic table. @@ -152,7 +309,7 @@ if (Error Err = populateDynamic(DynEnt, *DynTable)) return std::move(Err); - // Convert .dynstr address to an offset. + // Get pointer to in-memory location of .dynstr section. Expected DynStrPtr = ElfFile->toMappedAddr(DynEnt.StrTabAddr); if (!DynStrPtr) @@ -185,7 +342,25 @@ DestStub->NeededLibs.push_back(*LibNameOrErr); } - // TODO: Populate Symbols from .dynsym table and linked string table. + // Populate Symbols from .dynsym table and dynamic string table. + Expected SymCount = getNumSyms(DynEnt, *ElfFile); + if (!SymCount) + return SymCount.takeError(); + if (*SymCount > 0) { + // Get pointer to in-memory location of .dynsym section. + Expected DynSymPtr = + ElfFile->toMappedAddr(DynEnt.DynSymAddr); + if (!DynSymPtr) + return appendToError(DynSymPtr.takeError(), + "when locating .dynsym section contents"); + Elf_Sym_Range DynSyms = + ArrayRef(reinterpret_cast(*DynSymPtr), + *SymCount); + Error SymReadError = populateSymbols(*DestStub, DynSyms, DynStr); + if (SymReadError) + return appendToError(std::move(SymReadError), + "when reading dynamic symbols"); + } return std::move(DestStub); }