diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -127,20 +127,32 @@ ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const = 0; - /// May parse any prelude that precedes instructions after the start of a - /// symbol. Needed for some targets, e.g. WebAssembly. + /// Used to perform separate target specific disassembly for a particular + /// symbol. May parse any prelude that precedes instructions after the + /// start of a symbol, or the entire symbol. + /// This is used for example by WebAssembly to decode preludes. /// - /// \param Name - The name of the symbol. + /// Base implementation returns None. So all targets by default ignore to + /// treat symbols separately. + /// + /// \param Symbol - The symbol. /// \param Size - The number of bytes consumed. /// \param Address - The address, in the memory space of region, of the first /// byte of the symbol. /// \param Bytes - A reference to the actual bytes at the symbol location. /// \param CStream - The stream to print comments and annotations on. - /// \return - MCDisassembler::Success if the bytes are valid, - /// MCDisassembler::Fail if the bytes were invalid. - virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const; + /// \return - MCDisassembler::Success if bytes are decoded + /// successfully. Size must hold the number of bytes that + /// were decoded. + /// - MCDisassembler::Fail if the bytes are invalid. Size + /// must hold the number of bytes that were decoded before + /// failing. + /// - None if the target doesn't want to handle the symbol + /// separately. Value of Size is ignored in this case. + virtual Optional onSymbolStart(StringRef Name, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const; private: MCContext &Ctx; diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -16,12 +16,12 @@ MCDisassembler::~MCDisassembler() = default; -MCDisassembler::DecodeStatus +Optional MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { Size = 0; - return MCDisassembler::Success; + return None; // Ignore } bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -46,9 +46,10 @@ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; - DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const override; + Optional onSymbolStart(StringRef Name, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const override; public: WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, @@ -120,7 +121,7 @@ return true; } -MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( +Optional WebAssemblyDisassembler::onSymbolStart( StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { Size = 0; @@ -128,21 +129,21 @@ // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; // Ignore outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; // Ignore if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; // Ignore for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1427,10 +1427,42 @@ continue; } - // Some targets (like WebAssembly) have a special prelude at the start - // of each symbol. - DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); + auto Status = DisAsm->onSymbolStart(SymbolName, Size, + Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + // To have round trippable disassembly, we fall back to decoding the + // remaining bytes as instructions. + // + // If there is a failure, we disassemble the failed region as bytes + // before falling back. + // + // -> This means that on failure the target must print whatever it + // disassembled so far as comments? Requires discussion. + // + // -> Otherwise we would have decoded some bytes twice. Once by the + // target (until it failed) and then again as bytes. + + // If there is Success or SoftFail i.e no 'real' failure, we go ahead by + // Size bytes before falling back. + // So if the entire symbol is 'eaten' by the target: + // Start += Size // Now Start = End and we will never decode as + // // instructions + // + // Right now, most targets return None i.e ignore to treat a symbol + // separately. But WebAssembly decodes preludes for some symbols. + // + if (Status.hasValue()) { + if (Status.getValue() == MCDisassembler::Fail) { + outs() << "// Error in decoding " << SymbolName + << " : Decoding failed region as bytes.\n"; + for (int i = 0; i < Size; ++i) { + outs() << "\t.byte\t " << format_hex(Bytes[i], 1, /*Upper=*/true) << "\n"; + } + } + } else { + Size = 0; + } + Start += Size; Index = Start;