diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -124,20 +124,36 @@ ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const = 0; - /// May parse any prelude that precedes instructions after the start of a - /// symbol. Needed for some targets, e.g. WebAssembly. + /// Used to perform separate target specific disassembly for a particular + /// symbol. May parse any prelude that precedes instructions after the + /// start of a symbol, or the entire symbol. + /// This is used for example by WebAssembly to decode preludes. /// - /// \param Name - The name of the symbol. + /// Base implementation returns None. So all targets by default ignore to + /// treat symbols separately. + /// + /// \param Symbol - The symbol. /// \param Size - The number of bytes consumed. /// \param Address - The address, in the memory space of region, of the first /// byte of the symbol. /// \param Bytes - A reference to the actual bytes at the symbol location. /// \param CStream - The stream to print comments and annotations on. - /// \return - MCDisassembler::Success if the bytes are valid, - /// MCDisassembler::Fail if the bytes were invalid. - virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const; + /// \return - MCDisassembler::Success if bytes are decoded + /// successfully. Size must hold the number of bytes that were decoded. + /// - MCDisassembler::Fail if the bytes are invalid + /// - None if the target doesn't want to handle the symbol + /// separately. Set Size to 0 in this case. + virtual Optional + onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, + uint64_t Address, raw_ostream &CStream) const; + // TODO: + // Implement similar hooks that can be used at other points during + // disassembly. Something along the following lines: + // - onBeforeInstructionDecode() + // - onAfterInstructionDecode() + // - onSymbolEnd() + // It should help move much of the target specific code from llvm-objdump to + // respective target disassemblers. private: MCContext &Ctx; @@ -148,7 +164,7 @@ std::unique_ptr Symbolizer; public: - // Helpers around MCSymbolizer + // Helpers around MCSymbolizer. bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -16,12 +16,12 @@ MCDisassembler::~MCDisassembler() = default; -MCDisassembler::DecodeStatus -MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size, +Optional +MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { Size = 0; - return MCDisassembler::Success; + return None; // Ignore } bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -46,9 +46,10 @@ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; - DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const override; + Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const override; public: WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, @@ -120,29 +121,29 @@ return true; } -MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( - StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const { +Optional WebAssemblyDisassembler::onSymbolStart( + SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, + uint64_t Address, raw_ostream &CStream) const { Size = 0; if (Address == 0) { // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; // Ignore outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; // Ignore if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; // Ignore for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1427,10 +1427,25 @@ continue; } - // Some targets (like WebAssembly) have a special prelude at the start - // of each symbol. - DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); + auto Status = DisAsm->onSymbolStart(Symbols[SI], Size, + Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + + // Only fall back to normal flow if the symbol is decoded partially or + // when the target ignores it for special treatment. Right now, most + // targets return None i.e ignore to treat a symbol separately. But + // WebAssembly decodes preludes for some symbols. + + if (Status.hasValue()) { + // Check whether we failed(error in decoding) or decoded all bytes in + // the symbol. + if (Size == Bytes.size() || Status.getValue() == MCDisassembler::Fail) { + if (Status.getValue() == MCDisassembler::Fail) + outs() << "Error in decoding " << SymbolName << "\n"; + continue; + } + } + Start += Size; Index = Start;