diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-all-mapping-symbols.s b/llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-all-mapping-symbols.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-all-mapping-symbols.s @@ -0,0 +1,32 @@ +// Regression test for a bug in which --disassemble-all had the side effect +// of stopping mapping symbols from being checked in code sections, so that +// mixed Arm/Thumb code would not all be correctly disassembled. + +@ RUN: llvm-mc -triple arm-unknown-linux -filetype=obj %s -o %t.o +@ RUN: llvm-objdump -d %t.o | FileCheck %s +@ RUN: llvm-objdump -d --disassemble-all %t.o | FileCheck %s + +@ CHECK: 00000000 : +@ CHECK: 0: e2800001 add r0, r0, #1 +@ CHECK: 4: e12fff1e bx lr +@ +@ CHECK: 00000008 : +@ CHECK: 8: f100 0001 add.w r0, r0, #1 +@ CHECK: c: 4770 bx lr + + .arch armv8a + .text + + .arm + .global armfunc + .type armfunc, %function +armfunc: + add r0, r0, #1 + bx lr + + .thumb + .global thmfunc + .type thmfunc, %function +thmfunc: + add r0, r0, #1 + bx lr diff --git a/llvm/test/tools/llvm-objdump/ELF/data-vs-code-priority.s b/llvm/test/tools/llvm-objdump/ELF/data-vs-code-priority.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/data-vs-code-priority.s @@ -0,0 +1,66 @@ +@ REQUIRES: arm-registered-target + +// Test that code symbols take priority over data symbols if both are +// defined at the same address during disassembly. +// +// In the past, llvm-objdump would select the alphabetically last +// symbol at each address. To demonstrate that it's now choosing by +// symbol type, we define pairs of code and data symbols at the same +// address in such a way that the code symbol and data symbol each +// have a chance to appear alphabetically last. Also, we test that +// both STT_FUNC and STT_NOTYPE are regarded as code symbols. + +@ RUN: llvm-mc -triple armv8a-unknown-linux -filetype=obj %s -o %t.o +@ RUN: llvm-objdump --triple armv8a -d %t.o | FileCheck %s + +// Ensure that all four instructions in the section are disassembled +// rather than dumped as data, and that in each case, the code symbol +// is displayed before the disassembly, and not the data symbol at the +// same address. + +@ CHECK: Disassembly of section .text: +@ CHECK-EMPTY: +@ CHECK-NEXT: : +@ CHECK-NEXT: movw r0, #1 +@ CHECK-EMPTY: +@ CHECK-NEXT: : +@ CHECK-NEXT: movw r0, #2 +@ CHECK-EMPTY: +@ CHECK-NEXT: : +@ CHECK-NEXT: movw r0, #3 +@ CHECK-EMPTY: +@ CHECK-NEXT: : +@ CHECK-NEXT: movw r0, #4 + +.text + +.globl A1function +.globl B2function +.globl A3notype +.globl B4notype +.globl B1object +.globl A2object +.globl B3object +.globl A4object + +.type A1function,%function +.type B2function,%function +.type A3notype,%notype +.type B4notype,%notype +.type B1object,%object +.type A2object,%object +.type B3object,%object +.type A4object,%object + +A1function: +B1object: + movw r0, #1 +A2object: +B2function: + movw r0, #2 +A3notype: +B3object: + movw r0, #3 +A4object: +B4notype: + movw r0, #4 diff --git a/llvm/test/tools/llvm-objdump/multiple-symbols-mangling.s b/llvm/test/tools/llvm-objdump/multiple-symbols-mangling.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/multiple-symbols-mangling.s @@ -0,0 +1,42 @@ +// This test demonstrates that the alphabetical-order tie breaking between +// multiple symbols defined at the same address is based on the raw symbol +// name, not its demangled version. + +@ REQUIRES: arm-registered-target + +@ RUN: llvm-mc -triple armv8a-unknown-linux -filetype=obj %s -o %t.o + +// All the run lines below should generate some subset of this +// display, with different parts included: + +@ COMMON: Disassembly of section .text: +@ +@ RAW-B: 00000000 <_Z4bbbbv>: +@ NICE-B: 00000000 : +@ NO-B-NOT: bbbb +@ A: 00000000 : +@ COMMON: 0: e0800080 add r0, r0, r0, lsl #1 +@ COMMON: 4: e12fff1e bx lr + +// The default disassembly chooses just the alphabetically later symbol, which +// is aaaa, because the leading _ on a mangled name sorts before lowercase +// ASCII. + +@ RUN: llvm-objdump --triple armv8a -d %t.o | FileCheck --check-prefixes=COMMON,NO-B,A %s + +// With the --show-all-symbols option, bbbb is also shown, in its raw form. + +@ RUN: llvm-objdump --triple armv8a --show-all-symbols -d %t.o | FileCheck --check-prefixes=COMMON,RAW-B,A %s + +// With --demangle as well, bbbb is demangled, but that doesn't change its +// place in the sorting order. + +@ RUN: llvm-objdump --triple armv8a --show-all-symbols --demangle -d %t.o | FileCheck --check-prefixes=COMMON,NICE-B,A %s + +.text +.globl aaaa +.globl _Z4bbbv +aaaa: +_Z4bbbbv: + add r0, r0, r0, lsl #1 + bx lr diff --git a/llvm/test/tools/llvm-objdump/multiple-symbols.s b/llvm/test/tools/llvm-objdump/multiple-symbols.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/multiple-symbols.s @@ -0,0 +1,97 @@ +// This test checks the behavior of llvm-objdump's --disassemble-symbols and +// --show-all-symbols options, in the presence of multiple symbols defined at +// the same address in an object file. + +// The test input file contains an Arm and a Thumb function, each with two +// function-type symbols defined at its entry point. Also, because it's Arm, +// there's a $a mapping symbol defined at the start of the section, and a $t +// mapping symbol at the point where Arm code stops and Thumb code begins. + +// By default, llvm-objdump will pick one of the symbols to disassemble at each +// point where any are defined at all. The tie-break sorting criterion is +// alphabetic, so it will be the alphabetically later symbol in each case: of +// the names aaaa and bbbb for the Arm function it picks bbbb, and of cccc and +// dddd for the Thumb function it picks dddd. + +// Including an Arm and a Thumb function also re-checks that these changes to +// the display of symbols doesn't affect the recognition of mapping symbols for +// the purpose of switching disassembly mode. + +@ REQUIRES: arm-registered-target + +@ RUN: llvm-mc -triple armv8a-unknown-linux -filetype=obj %s -o %t.o + +// All the run lines below should generate some subset of this +// display, with different parts included: + +@ HEAD: Disassembly of section .text: +@ HEAD-EMPTY: +@ AMAP-NEXT: 00000000 <$a.0>: +@ AAAA-NEXT: 00000000 : +@ BBBB-NEXT: 00000000 : +@ AABB-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 +@ AABB-NEXT: 4: e12fff1e bx lr +@ BOTH-EMPTY: +@ TMAP-NEXT: 00000008 <$t.1>: +@ CCCC-NEXT: 00000008 : +@ DDDD-NEXT: 00000008 : +@ CCDD-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 +@ CCDD-NEXT: c: 4770 bx lr + +// The default disassembly chooses just the alphabetically later symbol of each +// set, namely bbbb and dddd. + +@ RUN: llvm-objdump --triple armv8a -d %t.o | FileCheck --check-prefixes=HEAD,BBBB,AABB,BOTH,DDDD,CCDD %s + +// With the --show-all-symbols option, all the symbols are shown, including the +// administrative mapping symbols. + +@ RUN: llvm-objdump --triple armv8a --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB,BOTH,TMAP,CCCC,DDDD,CCDD %s + +// If we use --disassemble-symbols to ask for the disassembly of aaaa or bbbb +// or both, then we expect the second cccc/dddd function not to appear in the +// output at all. Also, we want to see whichever symbol we asked about, or both +// if we asked about both. + +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa -d %t.o | FileCheck --check-prefixes=HEAD,AAAA,AABB %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=bbbb -d %t.o | FileCheck --check-prefixes=HEAD,BBBB,AABB %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa,bbbb -d %t.o | FileCheck --check-prefixes=HEAD,AAAA,BBBB,AABB %s + +// With _any_ of those three options and also --show-all-symbols, the +// disassembled code is still limited to just the symbol(s) you asked about, +// but all symbols defined at the same address are mentioned, whether you asked +// about them or not. + +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=bbbb --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa,bbbb --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB %s + +// Similarly for the Thumb function and its symbols. This time we must check +// that the aaaa/bbbb block of code was not disassembled _before_ the output +// we're expecting. + +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc -d %t.o | FileCheck --check-prefixes=HEAD,CCCC,CCDD %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=dddd -d %t.o | FileCheck --check-prefixes=HEAD,DDDD,CCDD %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc,dddd -d %t.o | FileCheck --check-prefixes=HEAD,CCCC,DDDD,CCDD %s + +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,TMAP,CCCC,DDDD,CCDD %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=dddd --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,TMAP,CCCC,DDDD,CCDD %s +@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc,dddd --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,TMAP,CCCC,DDDD,CCDD %s + +.text +.globl aaaa +.globl bbbb +.globl cccc +.globl dddd + +.arm +aaaa: +bbbb: + add r0, r0, r0, lsl #1 + bx lr + +.thumb +cccc: +dddd: + add.w r0, r0, r0, lsl #2 + bx lr diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -153,6 +153,10 @@ def : Flag<["-"], "h">, Alias, HelpText<"Alias for --section-headers">; +def show_all_symbols : Flag<["--"], "show-all-symbols">, + HelpText<"Show all symbols during disassembly, even if multiple " + "symbols are defined at the same location">; + def show_lma : Flag<["--"], "show-lma">, HelpText<"Display LMA column when dumping ELF section headers">; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -207,6 +207,7 @@ bool objdump::PrivateHeaders; std::vector objdump::FilterSections; bool objdump::SectionHeaders; +static bool ShowAllSymbols; static bool ShowLMA; bool objdump::PrintSource; @@ -1481,28 +1482,118 @@ std::vector Rels = RelocMap[Section]; std::vector::const_iterator RelCur = Rels.begin(); std::vector::const_iterator RelEnd = Rels.end(); - // Disassemble symbol by symbol. - for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) { - std::string SymbolName = Symbols[SI].Name.str(); - if (Demangle) - SymbolName = demangle(SymbolName); - - // Skip if --disassemble-symbols is not empty and the symbol is not in - // the list. - if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName)) - continue; + // Loop over each chunk of code between two points where at least + // one symbol is defined. + for (size_t SI = 0, SE = Symbols.size(); SI != SE;) { + // Advance SI past all the symbols starting at the same address, + // and make an ArrayRef of them. + unsigned FirstSI = SI; uint64_t Start = Symbols[SI].Addr; + ArrayRef SymbolsHere; + while (SI != SE && Symbols[SI].Addr == Start) + ++SI; + SymbolsHere = ArrayRef(&Symbols[FirstSI], SI - FirstSI); + + // Get the demangled names of all those symbols. We end up with a vector + // of StringRef that holds the names we're going to use, and a vector of + // std::string that stores the new strings returned by demangle(), if + // any. If we don't call demangle() then that vector can stay empty. + std::vector SymNamesHere; + std::vector DemangledSymNamesHere; + if (Demangle) { + // Fetch the demangled names and store them locally. + for (const SymbolInfoTy &Symbol : SymbolsHere) + DemangledSymNamesHere.push_back(demangle(Symbol.Name.str())); + // Now we've finished modifying that vector, it's safe to make + // a vector of StringRefs pointing into it. + SymNamesHere.insert(SymNamesHere.begin(), DemangledSymNamesHere.begin(), + DemangledSymNamesHere.end()); + } else { + for (const SymbolInfoTy &Symbol : SymbolsHere) + SymNamesHere.push_back(Symbol.Name); + } + + // Distinguish ELF data from code symbols, which will be used later on to + // decide whether to 'disassemble' this chunk as a data declaration via + // dumpELFData(), or whether to treat it as code. + // + // If data _and_ code symbols are defined at the same address, the code + // takes priority, on the grounds that disassembling code is our main + // purpose here, and it would be a worse failure to _not_ interpret + // something that _was_ meaningful as code than vice versa. + // + // Any ELF symbol type that is not clearly data will be regarded as code. + // In particular, one of the uses of STT_NOTYPE is for branch targets + // inside functions, for which STT_FUNC would be inaccurate. + // + // So here, we spot whether there's any non-data symbol present at all, + // and only set the DisassembleAsData flag if there isn't. Also, we use + // this distinction to inform the decision of which symbol to print at + // the head of the section, so that if we're printing code, we print a + // code-related symbol name to go with it. + bool DisassembleAsData = false; + size_t DisplaySymIndex = SymbolsHere.size() - 1; + if (Obj.isELF() && !DisassembleAll && Section.isText()) { + DisassembleAsData = true; // unless we find a code symbol below + + for (size_t i = 0; i < SymbolsHere.size(); ++i) { + uint8_t SymTy = SymbolsHere[i].Type; + if (SymTy != ELF::STT_OBJECT && SymTy != ELF::STT_COMMON) { + DisassembleAsData = false; + DisplaySymIndex = i; + } + } + } + + // Decide which symbol(s) from this collection we're going to print. + std::vector SymsToPrint(SymbolsHere.size(), false); + // If the user has given the --disassemble-symbols option, then we must + // display every symbol in that set, and no others. + if (!DisasmSymbolSet.empty()) { + bool FoundAny = false; + for (size_t i = 0; i < SymbolsHere.size(); ++i) { + if (DisasmSymbolSet.count(SymNamesHere[i])) { + SymsToPrint[i] = true; + FoundAny = true; + } + } + + // And if none of the symbols here is one that the user asked for, skip + // disassembling this entire chunk of code. + if (!FoundAny) + continue; + } else { + // Otherwise, print whichever symbol at this location is last in the + // Symbols array, because that array is pre-sorted in a way intended to + // correlate with priority of which symbol to display. + SymsToPrint[DisplaySymIndex] = true; + } + + // Now that we know we're disassembling this section, override the choice + // of which symbols to display by printing _all_ of them at this address + // if the user asked for all symbols. + // + // That way, '--show-all-symbols --disassemble-symbol=foo' will print + // only the chunk of code headed by 'foo', but also show any other + // symbols defined at that address, such as aliases for 'foo', or the ARM + // mapping symbol preceding its code. + if (ShowAllSymbols) { + for (size_t i = 0; i < SymbolsHere.size(); ++i) + SymsToPrint[i] = true; + } + if (Start < SectionAddr || StopAddress <= Start) continue; - else - FoundDisasmSymbolSet.insert(SymbolName); + + for (size_t i = 0; i < SymbolsHere.size(); ++i) + FoundDisasmSymbolSet.insert(SymNamesHere[i]); // The end is the section end, the beginning of the next symbol, or // --stop-address. uint64_t End = std::min(SectionAddr + SectSize, StopAddress); - if (SI + 1 < SE) - End = std::min(End, Symbols[SI + 1].Addr); + if (SI < SE) + End = std::min(End, Symbols[SI].Addr); if (Start >= End || End <= StartAddress) continue; Start -= SectionAddr; @@ -1517,13 +1608,22 @@ } outs() << '\n'; - if (LeadingAddr) - outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", - SectionAddr + Start + VMAAdjustment); - if (Obj.isXCOFF() && SymbolDescription) { - outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n"; - } else - outs() << '<' << SymbolName << ">:\n"; + + for (size_t i = 0; i < SymbolsHere.size(); ++i) { + if (!SymsToPrint[i]) + continue; + + const SymbolInfoTy &Symbol = SymbolsHere[i]; + const StringRef SymbolName = SymNamesHere[i]; + + if (LeadingAddr) + outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", + SectionAddr + Start + VMAAdjustment); + if (Obj.isXCOFF() && SymbolDescription) { + outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n"; + } else + outs() << '<' << SymbolName << ">:\n"; + } // Don't print raw contents of a virtual section. A virtual section // doesn't have any contents in the file. @@ -1532,57 +1632,67 @@ continue; } - auto Status = DisAsm->onSymbolStart(Symbols[SI], Size, - Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); - // To have round trippable disassembly, we fall back to decoding the - // remaining bytes as instructions. - // - // If there is a failure, we disassemble the failed region as bytes before - // falling back. The target is expected to print nothing in this case. + // See if any of the symbols defined at this location triggers target- + // specific disassembly behavior, e.g. of special descriptors or function + // prelude information. // - // If there is Success or SoftFail i.e no 'real' failure, we go ahead by - // Size bytes before falling back. - // So if the entire symbol is 'eaten' by the target: - // Start += Size // Now Start = End and we will never decode as - // // instructions - // - // Right now, most targets return None i.e ignore to treat a symbol - // separately. But WebAssembly decodes preludes for some symbols. - // - if (Status) { + // We stop this loop at the first symbol that triggers some kind of + // interesting behavior (if any), on the assumption that if two symbols + // defined at the same address trigger two conflicting symbol handlers, + // the object file is probably confused anyway, and it would make even + // less sense to present the output of _both_ handlers, because that + // would describe the same data twice. + for (size_t SHI = 0; SHI < SymbolsHere.size(); ++SHI) { + SymbolInfoTy Symbol = SymbolsHere[SHI]; + + auto Status = + DisAsm->onSymbolStart(Symbol, Size, Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + + if (!Status) { + // If onSymbolStart returns None, that means it didn't trigger any + // interesting handling for this symbol. Try the other symbols + // defined at this address. + continue; + } + if (Status.value() == MCDisassembler::Fail) { - outs() << "// Error in decoding " << SymbolName + // If onSymbolStart returns Fail, that means it identified some kind + // of special data at this address, but wasn't able to disassemble it + // meaningfully. So we fall back to disassembling the failed region + // as bytes, assuming that the target detected the failure before + // printing anything. + // + // Return values Success or SoftFail (i.e no 'real' failure) are + // expected to mean that the target has emitted its own output. + // + // Either way, 'Size' will have been set to the amount of data + // covered by whatever prologue the target identified. So we advance + // our own position to beyond that. Sometimes that will be the entire + // distance to the next symbol, and sometimes it will be just a + // prologue and we should start disassembling instructions from where + // it left off. + outs() << "// Error in decoding " << SymNamesHere[SHI] << " : Decoding failed region as bytes.\n"; for (uint64_t I = 0; I < Size; ++I) { outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) << "\n"; } } - } else { - Size = 0; + Start += Size; + break; } - Start += Size; - Index = Start; if (SectionAddr < StartAddress) Index = std::max(Index, StartAddress - SectionAddr); - // If there is a data/common symbol inside an ELF text section and we are - // only disassembling text (applicable all architectures), we are in a - // situation where we must print the data and not disassemble it. - if (Obj.isELF() && !DisassembleAll && Section.isText()) { - uint8_t SymTy = Symbols[SI].Type; - if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) { - dumpELFData(SectionAddr, Index, End, Bytes); - Index = End; - } + if (DisassembleAsData) { + dumpELFData(SectionAddr, Index, End, Bytes); + Index = End; + continue; } - bool CheckARMELFData = hasMappingSymbols(Obj) && - Symbols[SI].Type != ELF::STT_OBJECT && - !DisassembleAll; bool DumpARMELFData = false; formatted_raw_ostream FOS(outs()); @@ -1600,7 +1710,7 @@ // same section. We rely on the markers introduced to understand what // we need to dump. If the data marker is within a function, it is // denoted as a word/short etc. - if (CheckARMELFData) { + if (!MappingSymbols.empty()) { char Kind = getMappingSymbolKind(MappingSymbols, Index); DumpARMELFData = Kind == 'd'; if (SecondarySTI) { @@ -2841,6 +2951,7 @@ PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers); FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ); SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers); + ShowAllSymbols = InputArgs.hasArg(OBJDUMP_show_all_symbols); ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma); PrintSource = InputArgs.hasArg(OBJDUMP_source); parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress);