diff --git a/llvm/test/tools/llvm-objdump/multiple-symbols.test b/llvm/test/tools/llvm-objdump/multiple-symbols.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/multiple-symbols.test @@ -0,0 +1,177 @@ +# RUN: yaml2obj %s -o %t.o +# RUN: llvm-objdump --triple armv8a -d %t.o | FileCheck --check-prefix=DEFAULT %s +# RUN: llvm-objdump --triple armv8a --show-all-symbols -d %t.o | FileCheck --check-prefix=ALLSYMS %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa -d %t.o | FileCheck --check-prefix=AAAA %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=bbbb -d %t.o | FileCheck --check-prefix=BBBB %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa,bbbb -d %t.o | FileCheck --check-prefix=AABB %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa --show-all-symbols -d %t.o | FileCheck --check-prefix=AABB-ALL %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=bbbb --show-all-symbols -d %t.o | FileCheck --check-prefix=AABB-ALL %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa,bbbb --show-all-symbols -d %t.o | FileCheck --check-prefix=AABB-ALL %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc -d %t.o | FileCheck --check-prefix=CCCC %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=dddd -d %t.o | FileCheck --check-prefix=DDDD %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc,dddd -d %t.o | FileCheck --check-prefix=CCDD %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc --show-all-symbols -d %t.o | FileCheck --check-prefix=CCDD-ALL %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=dddd --show-all-symbols -d %t.o | FileCheck --check-prefix=CCDD-ALL %s +# RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc,dddd --show-all-symbols -d %t.o | FileCheck --check-prefix=CCDD-ALL %s + +## This test checks the behavior of llvm-objdump's --disassemble-symbols and +## --show-all-symbols options, in the presence of multiple symbols defined at +## the same address in an object file. + +## The test input file contains an Arm and a Thumb function, each with two +## function-type symbols defined at its entry point. Also, because it's Arm, +## there's a $a mapping symbol defined at the start of the section, and a $t +## mapping symbol at the point where Arm code stops and Thumb code begins. + +## By default, llvm-objdump will pick one of the symbols to disassemble at each +## point where any are defined at all. The tie-break sorting criterion is +## alphabetic, so it will be the alphabetically later symbol in each case: of +## the names aaaa,bbbb for the Arm function it picks bbbb, and of cccc,dddd for +## the Thumb function it picks dddd. + +# DEFAULT-NOT: >: +# DEFAULT: 00000000 : +# DEFAULT-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 +# DEFAULT-NEXT: 4: e12fff1e bx lr +# DEFAULT-EMPTY: +# DEFAULT-NEXT: 00000008 : +# DEFAULT-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 +# DEFAULT-NEXT: c: 4770 bx lr + +## With the --show-all-symbols option, all the symbols are shown, including the +## administrative mapping symbols. + +# ALLSYMS-NOT: >: +# ALLSYMS: 00000000 <$a.0>: +# ALLSYMS-NEXT: 00000000 : +# ALLSYMS-NEXT: 00000000 : +# ALLSYMS-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 +# ALLSYMS-NEXT: 4: e12fff1e bx lr +# ALLSYMS-EMPTY: +# ALLSYMS-NEXT: 00000008 <$t.1>: +# ALLSYMS-NEXT: 00000008 : +# ALLSYMS-NEXT: 00000008 : +# ALLSYMS-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 +# ALLSYMS-NEXT: c: 4770 bx lr + +## If you ask for '--disassemble-symbols=aaaa', then the symbol aaaa is singled +## out for display even though it wouldn't be shown by default, because it's +## the one you actually asked for. And display stops after that: we don't move +## on to disassemble the second block of code at all. + +# AAAA-NOT: >: +# AAAA: 00000000 : +# AAAA-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 +# AAAA-NEXT: 4: e12fff1e bx lr +# AAAA-NOT: 8 + +## Similarly, if you ask for '--disassemble-symbols=bbbb', then you see just +## bbbb. (This is the symbol that _would_ have been shown before, of course.) + +# BBBB-NOT: >: +# BBBB: 00000000 : +# BBBB-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 +# BBBB-NEXT: 4: e12fff1e bx lr +# BBBB-NOT: 8 + +## If you ask for both, via '--disassemble-symbols=aaaa,bbbb', then the code is +## only dumped once, but both symbols are shown at its entry point, because +## they're both symbols you expressed an interest in. + +# AABB-NOT: >: +# AABB: 00000000 : +# AABB-NEXT: 00000000 : +# AABB-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 +# AABB-NEXT: 4: e12fff1e bx lr +# AABB-NOT: 8 + +## With _any_ of those three options and also --show-all-symbols, the +## disassembled code is still limited to just the symbol(s) you asked about, +## but all symbols defined at the same address are mentioned, whether you asked +## about them or not. + +# AABB-ALL-NOT: >: +# AABB-ALL: 00000000 <$a.0>: +# AABB-ALL-NEXT: 00000000 : +# AABB-ALL-NEXT: 00000000 : +# AABB-ALL-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 +# AABB-ALL-NEXT: 4: e12fff1e bx lr +# AABB-ALL-NOT: 8 + +## Similarly for the other two functions. This time we must check that the +## aaaa/bbbb block of code was not disassembled _before_ the output we're +## expecting. + +## Asking for just cccc: + +# CCCC-NOT: 0: +# CCCC: 00000008 : +# CCCC-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 +# CCCC-NEXT: c: 4770 bx lr + +## Asking for just dddd: + +# DDDD-NOT: 0: +# DDDD: 00000008 : +# DDDD-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 +# DDDD-NEXT: c: 4770 bx lr + +## Asking for both: + +# CCDD-NOT: 0: +# CCDD: 00000008 : +# CCDD-NEXT: 00000008 : +# CCDD-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 +# CCDD-NEXT: c: 4770 bx lr + +## Any of those together with --show-all-symbols: + +# CCDD-ALL-NOT: 0: +# CCDD-ALL: 00000008 <$t.1>: +# CCDD-ALL-NEXT: 00000008 : +# CCDD-ALL-NEXT: 00000008 : +# CCDD-ALL-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 +# CCDD-ALL-NEXT: c: 4770 bx lr + +## And here's the input object file. + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_ARM + Flags: [ EF_ARM_EABI_VER5 ] +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x4 + Content: 800080E01EFF2FE100EB80007047 +Symbols: + - Name: '$a.0' + Section: .text + Value: 0x0 + - Name: aaaa + Section: .text + Type: STT_FUNC + Binding: STB_GLOBAL + Value: 0x0 + - Name: bbbb + Section: .text + Type: STT_FUNC + Binding: STB_GLOBAL + Value: 0x0 + - Name: '$t.1' + Section: .text + Value: 0x8 + - Name: cccc + Section: .text + Type: STT_FUNC + Binding: STB_GLOBAL + Value: 0x8 + - Name: dddd + Section: .text + Type: STT_FUNC + Binding: STB_GLOBAL + Value: 0x8 diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -153,6 +153,10 @@ def : Flag<["-"], "h">, Alias, HelpText<"Alias for --section-headers">; +def show_all_symbols : Flag<["--"], "show-all-symbols">, + HelpText<"Show all symbols during disassembly, even if multiple " + "symbols are defined at the same location">; + def show_lma : Flag<["--"], "show-lma">, HelpText<"Display LMA column when dumping ELF section headers">; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -207,6 +207,7 @@ bool objdump::PrivateHeaders; std::vector objdump::FilterSections; bool objdump::SectionHeaders; +static bool ShowAllSymbols; static bool ShowLMA; bool objdump::PrintSource; @@ -1481,28 +1482,79 @@ std::vector Rels = RelocMap[Section]; std::vector::const_iterator RelCur = Rels.begin(); std::vector::const_iterator RelEnd = Rels.end(); - // Disassemble symbol by symbol. - for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) { - std::string SymbolName = Symbols[SI].Name.str(); - if (Demangle) - SymbolName = demangle(SymbolName); - - // Skip if --disassemble-symbols is not empty and the symbol is not in - // the list. - if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName)) - continue; + // Loop over each chunk of code between two points where at least + // one symbol is defined. + for (unsigned SI = 0, SE = Symbols.size(); SI != SE;) { + // Advance SI past all the symbols starting at the same address, + // and make an ArrayRef of them. + unsigned FirstSI = SI; uint64_t Start = Symbols[SI].Addr; + ArrayRef SymbolsHere; + { + while (SI != SE && Symbols[SI].Addr == Start) + SI++; + SymbolsHere = ArrayRef(&Symbols[FirstSI], SI - FirstSI); + } + + // Get the demangled names of all those symbols. + std::vector SymNamesHere; + for (const SymbolInfoTy &Symbol : SymbolsHere) { + StringRef Name = Symbol.Name; + SymNamesHere.push_back(Demangle ? demangle(Name.str()) : Name.str()); + } + + // Decide which symbol(s) from this collection we're going to print. + std::vector SymsToPrint(SymbolsHere.size(), false); + { + // If the user has given the --disassemble-symbols option, then we must + // display every symbol in that set (that we can find at all), and no + // others. + if (!DisasmSymbolSet.empty()) { + bool FoundAny = false; + for (unsigned i = 0; i < SymbolsHere.size(); ++i) { + if (DisasmSymbolSet.count(SymNamesHere[i])) { + SymsToPrint[i] = true; + FoundAny = true; + } + } + + // And if none of the symbols here is one that the user asked for, + // skip disassembling this entire chunk of code. + if (!FoundAny) + continue; + } else { + // Otherwise, print whichever symbol at this location is last in + // the Symbols array, because that array is pre-sorted in a way + // intended to correlate with priority of which symbol to display. + SymsToPrint[SymbolsHere.size() - 1] = true; + } + + // Now that we know we're disassembling this section at all, override + // the choice of which symbols to display by printing _all_ of them a + // this address if the user asked for all symbols. + // + // (That way, '--show-all-symbols --disassemble-symbol=foo' will print + // only the //chunk of code// headed by 'foo', but also show any other + // symbols defined at that address, such as aliases for 'foo', or the + // Arm mapping symbol preceding its code.) + if (ShowAllSymbols) { + for (unsigned i = 0; i < SymbolsHere.size(); ++i) + SymsToPrint[i] = true; + } + } + if (Start < SectionAddr || StopAddress <= Start) continue; - else - FoundDisasmSymbolSet.insert(SymbolName); + + for (unsigned i = 0; i < SymbolsHere.size(); ++i) + FoundDisasmSymbolSet.insert(SymNamesHere[i]); // The end is the section end, the beginning of the next symbol, or // --stop-address. uint64_t End = std::min(SectionAddr + SectSize, StopAddress); - if (SI + 1 < SE) - End = std::min(End, Symbols[SI + 1].Addr); + if (SI < SE) + End = std::min(End, Symbols[SI].Addr); if (Start >= End || End <= StartAddress) continue; Start -= SectionAddr; @@ -1517,13 +1569,22 @@ } outs() << '\n'; - if (LeadingAddr) - outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", - SectionAddr + Start + VMAAdjustment); - if (Obj.isXCOFF() && SymbolDescription) { - outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n"; - } else - outs() << '<' << SymbolName << ">:\n"; + + for (unsigned i = 0; i < SymbolsHere.size(); ++i) { + if (!SymsToPrint[i]) + continue; + + const SymbolInfoTy &Symbol = SymbolsHere[i]; + const std::string &SymbolName = SymNamesHere[i]; + + if (LeadingAddr) + outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", + SectionAddr + Start + VMAAdjustment); + if (Obj.isXCOFF() && SymbolDescription) { + outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n"; + } else + outs() << '<' << SymbolName << ">:\n"; + } // Don't print raw contents of a virtual section. A virtual section // doesn't have any contents in the file. @@ -1532,39 +1593,57 @@ continue; } - auto Status = DisAsm->onSymbolStart(Symbols[SI], Size, - Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); - // To have round trippable disassembly, we fall back to decoding the - // remaining bytes as instructions. - // - // If there is a failure, we disassemble the failed region as bytes before - // falling back. The target is expected to print nothing in this case. - // - // If there is Success or SoftFail i.e no 'real' failure, we go ahead by - // Size bytes before falling back. - // So if the entire symbol is 'eaten' by the target: - // Start += Size // Now Start = End and we will never decode as - // // instructions + // See if any of the symbols defined at this location triggers target- + // specific disassembly behavior, e.g. of special descriptors or function + // prelude information. // - // Right now, most targets return None i.e ignore to treat a symbol - // separately. But WebAssembly decodes preludes for some symbols. - // - if (Status) { + // We stop this loop at the first symbol that triggers some kind of + // interesting behavior (if any), on the assumption that if two symbols + // defined at the same address trigger two conflicting symbol handlers, + // the object file is probably confused anyway, and it would make even + // less sense to present the output of _both_ handlers, because that + // would describe the same data twice. + for (unsigned SHI = 0; SHI < SymbolsHere.size(); ++SHI) { + SymbolInfoTy Symbol = SymbolsHere[SHI]; + + auto Status = + DisAsm->onSymbolStart(Symbol, Size, Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + + if (!Status) { + // If onSymbolStart returns None, that means it didn't trigger any + // interesting handling for this symbol. Try the other symbols + // defined at this address. + continue; + } + if (Status.value() == MCDisassembler::Fail) { - outs() << "// Error in decoding " << SymbolName + // If onSymbolStart returns Fail, that means it identified some kind + // of special data at this address, but wasn't able to disassemble it + // meaningfully. So we fall back to disassembling the failed region + // as bytes, assuming that the target detected the failure before + // printing anything. + // + // Return values Success or SoftFail (i.e no 'real' failure) are + // expected to mean that the target has emitted its own output. + // + // Either way, 'Size' will have been set to the amount of data + // covered by whatever prologue the target identified. So we advance + // our own position to beyond that. Sometimes that will be the entire + // distance to the next symbol, and sometimes it will be just a + // prologue and we should start disassembling instructions from where + // it left off. + outs() << "// Error in decoding " << SymNamesHere[SHI] << " : Decoding failed region as bytes.\n"; for (uint64_t I = 0; I < Size; ++I) { outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) << "\n"; } } - } else { - Size = 0; + Start += Size; + break; } - Start += Size; - Index = Start; if (SectionAddr < StartAddress) Index = std::max(Index, StartAddress - SectionAddr); @@ -1573,16 +1652,23 @@ // only disassembling text (applicable all architectures), we are in a // situation where we must print the data and not disassemble it. if (Obj.isELF() && !DisassembleAll && Section.isText()) { - uint8_t SymTy = Symbols[SI].Type; - if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) { - dumpELFData(SectionAddr, Index, End, Bytes); - Index = End; + for (const SymbolInfoTy &Symbol : SymbolsHere) { + uint8_t SymTy = Symbol.Type; + if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) { + dumpELFData(SectionAddr, Index, End, Bytes); + Index = End; + break; + } } } - bool CheckARMELFData = hasMappingSymbols(Obj) && - Symbols[SI].Type != ELF::STT_OBJECT && - !DisassembleAll; + bool CheckARMELFData = false; + if (hasMappingSymbols(Obj) && !DisassembleAll) { + CheckARMELFData = true; + for (const SymbolInfoTy &Symbol : SymbolsHere) + if (Symbol.Type == ELF::STT_OBJECT) + CheckARMELFData = false; + } bool DumpARMELFData = false; formatted_raw_ostream FOS(outs()); @@ -2838,6 +2924,7 @@ PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers); FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ); SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers); + ShowAllSymbols = InputArgs.hasArg(OBJDUMP_show_all_symbols); ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma); PrintSource = InputArgs.hasArg(OBJDUMP_source); parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress);