diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -127,8 +127,13 @@ ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const = 0; - /// May parse any prelude that precedes instructions after the start of a - /// symbol. Needed for some targets, e.g. WebAssembly. + /// Used to perform separate target specific disassembly for a particular + /// symbol. May parse any prelude that precedes instructions after the + /// start of a symbol, or the entire symbol. + /// This is used for example by WebAssembly to decode preludes. + /// + /// Base implementation returns None. So all targets by default ignore to + /// treat symbols separately. /// /// \param Name - The name of the symbol. /// \param Size - The number of bytes consumed. @@ -136,11 +141,27 @@ /// byte of the symbol. /// \param Bytes - A reference to the actual bytes at the symbol location. /// \param CStream - The stream to print comments and annotations on. - /// \return - MCDisassembler::Success if the bytes are valid, - /// MCDisassembler::Fail if the bytes were invalid. - virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const; + /// \return - MCDisassembler::Success if bytes are decoded + /// successfully. Size must hold the number of bytes that + /// were decoded. + /// - MCDisassembler::Fail if the bytes are invalid. Size + /// must hold the number of bytes that were decoded before + /// failing. The target must print nothing. This can be + /// done by buffering the output if needed. + /// - None if the target doesn't want to handle the symbol + /// separately. Value of Size is ignored in this case. + virtual Optional onSymbolStart(StringRef Name, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const; + // TODO: + // Implement similar hooks that can be used at other points during + // disassembly. Something along the following lines: + // - onBeforeInstructionDecode() + // - onAfterInstructionDecode() + // - onSymbolEnd() + // It should help move much of the target specific code from llvm-objdump to + // respective target disassemblers. private: MCContext &Ctx; diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -16,12 +16,11 @@ MCDisassembler::~MCDisassembler() = default; -MCDisassembler::DecodeStatus +Optional MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { - Size = 0; - return MCDisassembler::Success; + return None; } bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -46,9 +46,10 @@ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; - DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const override; + Optional onSymbolStart(StringRef Name, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const override; public: WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, @@ -120,7 +121,7 @@ return true; } -MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( +Optional WebAssemblyDisassembler::onSymbolStart( StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { Size = 0; @@ -128,21 +129,21 @@ // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1429,10 +1429,37 @@ continue; } - // Some targets (like WebAssembly) have a special prelude at the start - // of each symbol. - DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); + auto Status = DisAsm->onSymbolStart(SymbolName, Size, + Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + // To have round trippable disassembly, we fall back to decoding the + // remaining bytes as instructions. + // + // If there is a failure, we disassemble the failed region as bytes before + // falling back. The target is expected to print nothing in this case. + // + // If there is Success or SoftFail i.e no 'real' failure, we go ahead by + // Size bytes before falling back. + // So if the entire symbol is 'eaten' by the target: + // Start += Size // Now Start = End and we will never decode as + // // instructions + // + // Right now, most targets return None i.e ignore to treat a symbol + // separately. But WebAssembly decodes preludes for some symbols. + // + if (Status.hasValue()) { + if (Status.getValue() == MCDisassembler::Fail) { + outs() << "// Error in decoding " << SymbolName + << " : Decoding failed region as bytes.\n"; + for (uint64_t I = 0; I < Size; ++I) { + outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) + << "\n"; + } + } + } else { + Size = 0; + } + Start += Size; Index = Start;