Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -348,6 +348,17 @@ bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const { return offset + length >= offset && isValidOffset(offset + length - 1); } + + /// Test the availability of enough bytes of data for a pointer from + /// \a offset. The size of a pointer is \a getAddressSize(). + /// + /// @return + /// \b true if \a offset is a valid offset and there are enough + /// bytes for a pointer available at that offset, \b false + /// otherwise. + bool isValidOffsetForAddress(uint32_t offset) const { + return isValidOffsetForDataOfSize(offset, AddressSize); + } }; } // namespace llvm Index: test/tools/llvm-symbolizer/ppc64.test =================================================================== --- /dev/null +++ test/tools/llvm-symbolizer/ppc64.test @@ -0,0 +1,11 @@ +// ppc64 was compiled from this source on a big-endian 64-bit PowerPC box +// with just "clang -nostdlib": +int foo() { return 0; } +int bar() { return foo(); } +int _start() { return bar(); } + +RUN: ( echo 0x1000014c ; echo 0x1000018c ; echo 0x100001cc ) | llvm-symbolizer -obj=%p/Inputs/ppc64 | FileCheck %s + +CHECK: foo +CHECK: bar +CHECK: _start Index: tools/llvm-symbolizer/LLVMSymbolize.h =================================================================== --- tools/llvm-symbolizer/LLVMSymbolize.h +++ tools/llvm-symbolizer/LLVMSymbolize.h @@ -17,6 +17,7 @@ #include "llvm/DebugInfo/DIContext.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/DataExtractor.h" #include "llvm/Support/MemoryBuffer.h" #include #include @@ -115,7 +116,11 @@ bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const; - void addSymbol(const SymbolRef &Symbol); + // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd + // (function descriptor) section and OpdExtractor refers to its contents. + void addSymbol(const SymbolRef &Symbol, + DataExtractor *OpdExtractor = nullptr, + uint64_t OpdAddress = 0); ObjectFile *Module; std::unique_ptr DebugInfoContext; Index: tools/llvm-symbolizer/LLVMSymbolize.cpp =================================================================== --- tools/llvm-symbolizer/LLVMSymbolize.cpp +++ tools/llvm-symbolizer/LLVMSymbolize.cpp @@ -45,8 +45,26 @@ ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) : Module(Obj), DebugInfoContext(DICtx) { + std::unique_ptr OpdExtractor; + uint64_t OpdAddress = 0; + // Find the .opd (function descriptor) section if any, for big-endian + // PowerPC64 ELF. + if (Module->getArch() == Triple::ppc64) { + for (section_iterator Section : Module->sections()) { + StringRef Name; + if (!error(Section->getName(Name)) && Name == ".opd") { + StringRef Data; + if (!error(Section->getContents(Data))) { + OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(), + Module->getBytesInAddress())); + OpdAddress = Section->getAddress(); + } + break; + } + } + } for (const SymbolRef &Symbol : Module->symbols()) { - addSymbol(Symbol); + addSymbol(Symbol, OpdExtractor.get(), OpdAddress); } bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end()); if (NoSymbolTable && Module->isELF()) { @@ -54,12 +72,13 @@ std::pair IDyn = getELFDynamicSymbolIterators(Module); for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) { - addSymbol(*si); + addSymbol(*si, OpdExtractor.get(), OpdAddress); } } } -void ModuleInfo::addSymbol(const SymbolRef &Symbol) { +void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor, + uint64_t OpdAddress) { SymbolRef::Type SymbolType; if (error(Symbol.getType(SymbolType))) return; @@ -69,6 +88,18 @@ if (error(Symbol.getAddress(SymbolAddress)) || SymbolAddress == UnknownAddressOrSize) return; + if (OpdExtractor) { + // For big-endian PowerPC64 ELF, symbols in the .opd section refer to + // function descriptors. The first word of the descriptor is a pointer to + // the function's code. + // For the purposes of symbolization, pretend the symbol's address is that + // of the function's code, not the descriptor. + uint64_t OpdOffset = SymbolAddress - OpdAddress; + uint32_t OpdOffset32 = OpdOffset; + if (OpdOffset == OpdOffset32 && + OpdExtractor->isValidOffsetForAddress(OpdOffset32)) + SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); + } uint64_t SymbolSize; // Getting symbol size is linear for Mach-O files, so assume that symbol // occupies the memory range up to the following symbol.