diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" + #include #include #include @@ -72,7 +73,7 @@ /// array of assembly instructions. class MCDisassembler { public: - /// Ternary decode status. Most backends will just use Fail and + /// Four valued decode status. Most backends will just use Fail and /// Success, however some have a concept of an instruction with /// understandable semantics but which is architecturally /// incorrect. An example of this is ARM UNPREDICTABLE instructions @@ -82,6 +83,9 @@ /// is a "soft fail" failure mode that indicates the MCInst& is /// valid but architecturally incorrect. /// + /// When disassembling symbols, targets can also return Ignore or SoftFail in + /// case they partially disassembled it. + /// /// The enum numbers are deliberately chosen such that reduction /// from Success->SoftFail ->Fail can be done with a simple /// bitwise-AND: @@ -94,11 +98,7 @@ /// /// An easy way of encoding this is as 0b11, 0b01, 0b00 for /// Success, SoftFail, Fail respectively. - enum DecodeStatus { - Fail = 0, - SoftFail = 1, - Success = 3 - }; + enum DecodeStatus { Fail = 0, SoftFail = 1, Success = 3, Ignore = 4 }; MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : Ctx(Ctx), STI(STI) {} @@ -124,20 +124,38 @@ ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const = 0; - /// May parse any prelude that precedes instructions after the start of a - /// symbol. Needed for some targets, e.g. WebAssembly. + /// Used to perform separate target specific disassembly for a particular + /// symbol. May parse any prelude that precedes instructions after the + /// start of a symbol symbol, or the entire symbol. + /// This is used for example by AMDGPU to decode kernel descriptors. + /// + /// Base implementation returns + /// MCDisassembler::Ignore /// - /// \param Name - The name of the symbol. + /// \param Symbol - The symbol. /// \param Size - The number of bytes consumed. /// \param Address - The address, in the memory space of region, of the first /// byte of the symbol. /// \param Bytes - A reference to the actual bytes at the symbol location. /// \param CStream - The stream to print comments and annotations on. - /// \return - MCDisassembler::Success if the bytes are valid, - /// MCDisassembler::Fail if the bytes were invalid. - virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, + /// \return - MCDisassembler::Success if entire symbol is decoded + /// successfully by the target. + /// MCDisassembler::Fail if the bytes are invalid + /// MCDisassembler::SoftFail if the target needs to handle + /// some bytes i.e not the entire symbol + /// MCDisassembler::Ignore if the target doesn't want to + /// handle the symbol separately + virtual DecodeStatus onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const; + // TODO: + // Implement similar hooks that can be used at other points during + // disassembly. Something along the following lines: + // - onBeforeInstructionDecode() + // - onAfterInstructionDecode() + // - onSymbolEnd() + // It should help move much of the target specific code from llvm-objdump to + // respective target disassemblers. private: MCContext &Ctx; @@ -148,7 +166,7 @@ std::unique_ptr Symbolizer; public: - // Helpers around MCSymbolizer + // Helpers around MCSymbolizer. bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -17,11 +17,11 @@ MCDisassembler::~MCDisassembler() = default; MCDisassembler::DecodeStatus -MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size, +MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { Size = 0; - return MCDisassembler::Success; + return MCDisassembler::Ignore; } bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -46,7 +46,7 @@ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; - DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, + DecodeStatus onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; @@ -121,28 +121,28 @@ } MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( - StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const { + SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, + uint64_t Address, raw_ostream &CStream) const { Size = 0; if (Address == 0) { // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return MCDisassembler::Ignore; outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return MCDisassembler::Ignore; if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) - return MCDisassembler::Fail; + return MCDisassembler::Ignore; for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; @@ -152,7 +152,7 @@ } } outs() << "\n"; - return MCDisassembler::Success; + return MCDisassembler::SoftFail; } MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1396,7 +1396,7 @@ if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) + // Skip amd_kernel_code_t at the begining of kernel symbol (256bytes). Start += 256; } if (SI == SE - 1 || @@ -1405,8 +1405,8 @@ // cut up to 256 bytes const uint64_t EndAlign = 256; const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast(&Bytes[End - 4]) == 0) + while (End > Limit && *reinterpret_cast( + &Bytes[End - 4]) == 0) End -= 4; } } @@ -1427,10 +1427,23 @@ continue; } - // Some targets (like WebAssembly) have a special prelude at the start - // of each symbol. - DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); + MCDisassembler::DecodeStatus status = DisAsm->onSymbolStart( + Symbols[SI], Size, Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + + if (status == MCDisassembler::Success || status == MCDisassembler::Fail) { + if (status == MCDisassembler::Fail) + outs() << "Error in decoding " << SymbolName << "\n"; + continue; + } + + // Only fall back to normal flow if the symbol is decoded partially + // (SoftFail) or when the target ignores the symbol for special treatment. + // Right now: + // - WebAssembly decodes preludes (return SoftFail / Ignore) + // - AMDGPU needs it to decode kernel descriptors + // (return Success / Fail / Ignore) + Start += Size; Index = Start;