diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -53,6 +53,10 @@ ArrayRef AddrInfoOffsets; ArrayRef Files; StringTable StrTab; + // Users can change the base address of a GSYM file manually when + // symbolicating which allows clients to do lookups using addresses from a + // process that loaded the object file at a different base address. + Optional BaseAddressOverride; /// When the GSYM file's endianness doesn't match the host system then /// we must decode all data structures that need to be swapped into /// local storage and set point the ArrayRef objects above to these swapped @@ -200,19 +204,45 @@ /// Gets an address from the address table. /// - /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. + /// Addresses are stored as offsets from the GSYM base address. /// /// \param Index A index into the address table. /// \returns A resolved virtual address for adddress in the address table /// or llvm::None if Index is out of bounds. Optional getAddress(size_t Index) const; + /// Gets the base address of this GSYM file. + /// + /// The base address is built into the GSYM file's header, but the user might + /// want to override the default base address. Fetch the appropriate base + /// address for this GSYM file. + /// + /// \returns The base address of the GSYM file, which will be from the GSYM + /// header unless the user has manually set the base address. + uint64_t getBaseAddress() const { + if (BaseAddressOverride) + return *BaseAddressOverride; + else + return Hdr->BaseAddress; + } + + /// Sets the base address of this GSYM file. + /// + /// All addresses in a GSYM file are based off of the base address from the + /// GSYM header. When symbolicating addresses, the object file is often loaded + /// at a load address that doesn't match the base address in the header. If we + /// set the base address, we can relocate all symbols in the GSYM file and + /// then do lookups with load addresses from a process. + /// + /// \param Addr The base address of the GSYM file. + void setBaseAddress(uint64_t Addr) { BaseAddressOverride = Addr; } + protected: /// Get an appropriate address info offsets array. /// /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 - /// byte offsets from the The gsym::Header::BaseAddress. The table is stored + /// byte offsets from the The GSYM base address. The table is stored /// internally as a array of bytes that are in the correct endianness. When /// we access this table we must get an array that matches those sizes. This /// templatized helper function is used when accessing address offsets in the @@ -228,7 +258,7 @@ /// Get an appropriate address from the address table. /// /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 - /// byte address offsets from the The gsym::Header::BaseAddress. The table is + /// byte address offsets from the The GSYM base address. The table is /// stored internally as a array of bytes that are in the correct endianness. /// In order to extract an address from the address table we must access the /// address offset using the correct size and then add it to the BaseAddress @@ -241,7 +271,7 @@ addressForIndex(size_t Index) const { ArrayRef AIO = getAddrOffsets(); if (Index < AIO.size()) - return AIO[Index] + Hdr->BaseAddress; + return AIO[Index] + getBaseAddress(); return llvm::None; } /// Lookup an address offset in the AddrOffsets table. @@ -250,7 +280,7 @@ /// AddrOffsets table. /// /// \param AddrOffset An address offset, that has already been computed by - /// subtracting the gsym::Header::BaseAddress. + /// subtracting the GSYM base address. /// \returns The matching address offset index. This index will be used to /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. template @@ -259,7 +289,7 @@ const auto Begin = AIO.begin(); const auto End = AIO.end(); auto Iter = std::lower_bound(Begin, End, AddrOffset); - // Watch for addresses that fall between the gsym::Header::BaseAddress and + // Watch for addresses that fall between the GSYM base address and // the first address offset. if (Iter == Begin && AddrOffset < *Begin) return llvm::None; diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -225,8 +225,9 @@ Expected GsymReader::getAddressIndex(const uint64_t Addr) const { - if (Addr >= Hdr->BaseAddress) { - const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + const uint64_t BaseAddress = getBaseAddress(); + if (Addr >= BaseAddress) { + const uint64_t AddrOffset = Addr - BaseAddress; Optional AddrOffsetIndex; switch (Hdr->AddrOffSize) { case 1: diff --git a/llvm/test/tools/llvm-gsymutil/X86/elf-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/X86/elf-dwarf.yaml --- a/llvm/test/tools/llvm-gsymutil/X86/elf-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/X86/elf-dwarf.yaml @@ -5,20 +5,32 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-gsymutil --convert %t -o %t.gsym 2>&1 | FileCheck %s --check-prefix=CONVERT # RUN: llvm-gsymutil --address=0x400391 --address=0x4004cd %t.gsym 2>&1 | FileCheck %s --check-prefix=ADDR +# RUN: llvm-gsymutil --address=0x800391 --address=0x8004cd --load-address=0x800000 %t.gsym 2>&1 | FileCheck %s --check-prefix=ADDR_BASE # RUN: echo -e "0x400391 %/t.gsym\n0x4004cd %/t.gsym" | llvm-gsymutil --addresses-from-stdin 2>&1 | FileCheck %s --check-prefix=ADDRI --dump-input=always +# RUN: echo -e "0x800391 %/t.gsym\n0x8004cd %/t.gsym" | llvm-gsymutil --load-address=0x800000 --addresses-from-stdin 2>&1 | FileCheck %s --check-prefix=ADDRI_BASE --dump-input=always # RUN: llvm-gsymutil --address=0x400391 --address=0x4004cd --verbose %t.gsym 2>&1 | FileCheck %s --check-prefix=ADDRV --dump-input=always -# RUN: llvm-gsymutil --address=0x400391 --address=0x4004cd --verbose %t.gsym 2>&1 | FileCheck %s --check-prefix=ADDRV --dump-input=always +# RUN: llvm-gsymutil --address=0x800391 --address=0x8004cd --load-address=0x800000 --verbose %t.gsym 2>&1 | FileCheck %s --check-prefix=ADDRV_BASE --dump-input=always # RUN: llvm-gsymutil %t.gsym 2>&1 | FileCheck %s --check-prefix=DUMP +# RUN: llvm-gsymutil --load-address=0x800000 %t.gsym 2>&1 | FileCheck %s --check-prefix=DUMP_BASE # ADDR: Looking up addresses in "{{.*\.yaml\.tmp\.gsym}}": # ADDR: 0x0000000000400391: _init # ADDR: 0x00000000004004cd: main @ /tmp/main.cpp:1 +# ADDR_BASE: Looking up addresses in "{{.*\.yaml\.tmp\.gsym}}": +# ADDR_BASE: 0x0000000000800391: _init +# ADDR_BASE: 0x00000000008004cd: main @ /tmp/main.cpp:1 + # ADDRI: 0x0000000000400391: _init # ADDRI-EMPTY: # ADDRI: 0x00000000004004cd: main @ /tmp/main.cpp:1 # ADDRI-EMPTY: +# ADDRI_BASE: 0x0000000000800391: _init +# ADDRI_BASE-EMPTY: +# ADDRI_BASE: 0x00000000008004cd: main @ /tmp/main.cpp:1 +# ADDRI_BASE-EMPTY: + # ADDRV: Looking up addresses in "{{.*\.yaml\.tmp\.gsym}}": # ADDRV: FunctionInfo for 0x0000000000400391: # ADDRV: [0x0000000000400390 - 0x0000000000400390) "_init" @@ -33,6 +45,20 @@ # ADDRV: LookupResult for 0x00000000004004cd: # ADDRV: 0x00000000004004cd: main @ /tmp/main.cpp:1 +# ADDRV_BASE: Looking up addresses in "{{.*\.yaml\.tmp\.gsym}}": +# ADDRV_BASE: FunctionInfo for 0x0000000000800391: +# ADDRV_BASE: [0x0000000000800390 - 0x0000000000800390) "_init" +# ADDRV_BASE: LookupResult for 0x0000000000800391: +# ADDRV_BASE: 0x0000000000800391: _init +# ADDRV_BASE: FunctionInfo for 0x00000000008004cd: +# ADDRV_BASE: [0x00000000008004cd - 0x00000000008004df) "main" +# ADDRV_BASE: LineTable: +# ADDRV_BASE: 0x00000000008004cd /tmp/main.cpp:1 +# ADDRV_BASE: 0x00000000008004d8 /tmp/main.cpp:2 +# ADDRV_BASE: 0x00000000008004dd /tmp/main.cpp:3 +# ADDRV_BASE: LookupResult for 0x00000000008004cd: +# ADDRV_BASE: 0x00000000008004cd: main @ /tmp/main.cpp:1 + # CONVERT: Input file: {{.*\.yaml\.tmp}} # CONVERT: Output file (x86_64): {{.*\.yaml\.tmp\.gsym}} # CONVERT: Loaded 1 functions from DWARF. @@ -112,6 +138,81 @@ # DUMP: FunctionInfo @ 0x00000194: [0x00000000004004e0 - 0x0000000000400545) "__libc_csu_init" # DUMP: FunctionInfo @ 0x000001a4: [0x0000000000400550 - 0x0000000000400552) "__libc_csu_fini" # DUMP: FunctionInfo @ 0x000001b4: [0x0000000000400554 - 0x000000000040055d) "_fini" + +# DUMP_BASE: Header: +# DUMP_BASE-NEXT: Magic = 0x4753594d +# DUMP_BASE-NEXT: Version = 0x0001 +# DUMP_BASE-NEXT: AddrOffSize = 0x02 +# DUMP_BASE-NEXT: UUIDSize = 0x14 +# DUMP_BASE-NEXT: BaseAddress = 0x0000000000400000 +# DUMP_BASE-NEXT: NumAddresses = 0x0000000a +# DUMP_BASE-NEXT: StrtabOffset = 0x00000080 +# DUMP_BASE-NEXT: StrtabSize = 0x00000091 +# DUMP_BASE-NEXT: UUID = 0e62be89cad89206110ed1375b618656f32ac906 + +# DUMP_BASE: Address Table: +# DUMP_BASE-NEXT: INDEX OFFSET16 (ADDRESS) +# DUMP_BASE-NEXT: ====== =============================== +# DUMP_BASE-NEXT: [ 0] 0x0390 (0x0000000000800390) +# DUMP_BASE-NEXT: [ 1] 0x03e0 (0x00000000008003e0) +# DUMP_BASE-NEXT: [ 2] 0x0410 (0x0000000000800410) +# DUMP_BASE-NEXT: [ 3] 0x0440 (0x0000000000800440) +# DUMP_BASE-NEXT: [ 4] 0x0480 (0x0000000000800480) +# DUMP_BASE-NEXT: [ 5] 0x04a0 (0x00000000008004a0) +# DUMP_BASE-NEXT: [ 6] 0x04cd (0x00000000008004cd) +# DUMP_BASE-NEXT: [ 7] 0x04e0 (0x00000000008004e0) +# DUMP_BASE-NEXT: [ 8] 0x0550 (0x0000000000800550) +# DUMP_BASE-NEXT: [ 9] 0x0554 (0x0000000000800554) + +# DUMP_BASE: Address Info Offsets: +# DUMP_BASE-NEXT: INDEX Offset +# DUMP_BASE-NEXT: ====== ========== +# DUMP_BASE-NEXT: [ 0] 0x00000114 +# DUMP_BASE-NEXT: [ 1] 0x00000124 +# DUMP_BASE-NEXT: [ 2] 0x00000134 +# DUMP_BASE-NEXT: [ 3] 0x00000144 +# DUMP_BASE-NEXT: [ 4] 0x00000154 +# DUMP_BASE-NEXT: [ 5] 0x00000164 +# DUMP_BASE-NEXT: [ 6] 0x00000174 +# DUMP_BASE-NEXT: [ 7] 0x00000194 +# DUMP_BASE-NEXT: [ 8] 0x000001a4 +# DUMP_BASE-NEXT: [ 9] 0x000001b4 + +# DUMP_BASE: Files: +# DUMP_BASE-NEXT: INDEX DIRECTORY BASENAME PATH +# DUMP_BASE-NEXT: ====== ========== ========== ============================== +# DUMP_BASE-NEXT: [ 0] 0x00000000 0x00000000 +# DUMP_BASE-NEXT: [ 1] 0x00000006 0x0000000b /tmp/main.cpp + +# DUMP_BASE: String table: +# DUMP_BASE-NEXT: 0x00000000: "" +# DUMP_BASE-NEXT: 0x00000001: "main" +# DUMP_BASE-NEXT: 0x00000006: "/tmp" +# DUMP_BASE-NEXT: 0x0000000b: "main.cpp" +# DUMP_BASE-NEXT: 0x00000014: "deregister_tm_clones" +# DUMP_BASE-NEXT: 0x00000029: "register_tm_clones" +# DUMP_BASE-NEXT: 0x0000003c: "__do_global_dtors_aux" +# DUMP_BASE-NEXT: 0x00000052: "frame_dummy" +# DUMP_BASE-NEXT: 0x0000005e: "__libc_csu_fini" +# DUMP_BASE-NEXT: 0x0000006e: "_fini" +# DUMP_BASE-NEXT: 0x00000074: "__libc_csu_init" +# DUMP_BASE-NEXT: 0x00000084: "_start" +# DUMP_BASE-NEXT: 0x0000008b: "_init" +# DUMP_BASE: FunctionInfo @ 0x00000114: [0x0000000000800390 - 0x0000000000800390) "_init" +# DUMP_BASE: FunctionInfo @ 0x00000124: [0x00000000008003e0 - 0x00000000008003e0) "_start" +# DUMP_BASE: FunctionInfo @ 0x00000134: [0x0000000000800410 - 0x0000000000800410) "deregister_tm_clones" +# DUMP_BASE: FunctionInfo @ 0x00000144: [0x0000000000800440 - 0x0000000000800440) "register_tm_clones" +# DUMP_BASE: FunctionInfo @ 0x00000154: [0x0000000000800480 - 0x0000000000800480) "__do_global_dtors_aux" +# DUMP_BASE: FunctionInfo @ 0x00000164: [0x00000000008004a0 - 0x00000000008004a0) "frame_dummy" +# DUMP_BASE: FunctionInfo @ 0x00000174: [0x00000000008004cd - 0x00000000008004df) "main" +# DUMP_BASE-NEXT: LineTable: +# DUMP_BASE-NEXT: 0x00000000008004cd /tmp/main.cpp:1 +# DUMP_BASE-NEXT: 0x00000000008004d8 /tmp/main.cpp:2 +# DUMP_BASE-NEXT: 0x00000000008004dd /tmp/main.cpp:3 +# DUMP_BASE: FunctionInfo @ 0x00000194: [0x00000000008004e0 - 0x0000000000800545) "__libc_csu_init" +# DUMP_BASE: FunctionInfo @ 0x000001a4: [0x0000000000800550 - 0x0000000000800552) "__libc_csu_fini" +# DUMP_BASE: FunctionInfo @ 0x000001b4: [0x0000000000800554 - 0x000000000080055d) "_fini" + --- !ELF FileHeader: Class: ELFCLASS64 diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -115,6 +115,15 @@ cl::value_desc("addr"), cat(LookupOptions)); +static opt + LoadAddress("load-address", + desc("Set the load address of the binary. Any addresses " + "displayed in output will be relocated using the new address."), + cl::value_desc("l"), cat(GeneralOptions)); + +static alias LoadAddressAlias("l", desc("Alias for --load-address"), + aliasopt(LoadAddress), cl::NotHidden); + static opt LookupAddressesFromStdin( "addresses-from-stdin", desc("Lookup addresses in a GSYM file that are read from stdin\nEach input " @@ -497,6 +506,8 @@ if (!*CurrentGsym) error(GSYMPath, CurrentGsym->takeError()); CurrentGSYMPath = GSYMPath; + if (LoadAddress > 0) + (**CurrentGsym).setBaseAddress(LoadAddress); } uint64_t Addr; @@ -521,6 +532,10 @@ if (!Gsym) error(GSYMPath, Gsym.takeError()); + // Set the load address if it has been + if (LoadAddress > 0) + Gsym->setBaseAddress(LoadAddress); + if (LookupAddresses.empty()) { Gsym->dump(outs()); continue;