diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/Support/Endian.h" #include #include #include @@ -94,11 +95,7 @@ /// /// An easy way of encoding this is as 0b11, 0b01, 0b00 for /// Success, SoftFail, Fail respectively. - enum DecodeStatus { - Fail = 0, - SoftFail = 1, - Success = 3 - }; + enum DecodeStatus { Fail = 0, SoftFail = 1, Success = 3, Ignore = 4 }; MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : Ctx(Ctx), STI(STI) {} @@ -124,8 +121,14 @@ ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const = 0; - /// May parse any prelude that precedes instructions after the start of a - /// symbol. Needed for some targets, e.g. WebAssembly. + /// Used to perform separate target specific disassembly for a + /// particular symbol. May parse any prelude that precedes instructions + /// after the start of a symbol symbol; or the entire symbol. + /// Right now WebAssembly implements this, and AMDGPU needs it to deccode + /// kernel descriptors. + /// + /// Base implementation returns + /// MCDisassembler::Ignore /// /// \param Name - The name of the symbol. /// \param Size - The number of bytes consumed. @@ -133,11 +136,24 @@ /// byte of the symbol. /// \param Bytes - A reference to the actual bytes at the symbol location. /// \param CStream - The stream to print comments and annotations on. - /// \return - MCDisassembler::Success if the bytes are valid, - /// MCDisassembler::Fail if the bytes were invalid. - virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, + /// \return - MCDisassembler::Success if entire symbol is decoded + /// successfully by the target. + /// MCDisassembler::Fail if the bytes are invalid + /// MCDisassembler::SoftFail if the target needs to handle + /// some bytes i.e not the entire symbol + /// MCDisassembler::Ignore if the target doesn't want to + /// handle the symbol separately + virtual DecodeStatus onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const; + /// TODO: + /// Implement similar hooks that can be used at other points during + /// disassembly. Something along the following lines: + /// - onBeforeInstructionDecode() + /// - onAfterInstructionDecode() + /// - onSymbolEnd() + /// It should help move much of the target specific code from llvm-objdump to + /// respective target disassemblers private: MCContext &Ctx; @@ -148,6 +164,16 @@ std::unique_ptr Symbolizer; public: + // Helpers to read bytes + bool getWord(support::endianness E, ArrayRef Bytes, size_t index, + uint16_t &word) const; + + bool getDoubleWord(support::endianness E, ArrayRef Bytes, + size_t index, uint32_t &doubleWord) const; + + bool getQuadWord(support::endianness E, ArrayRef Bytes, size_t index, + uint64_t &quadWord) const; + // Helpers around MCSymbolizer bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -17,11 +17,45 @@ MCDisassembler::~MCDisassembler() = default; MCDisassembler::DecodeStatus -MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size, +MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { Size = 0; - return MCDisassembler::Success; + return MCDisassembler::Ignore; +} + +bool MCDisassembler::getWord(support::endianness E, ArrayRef Bytes, + size_t index, uint16_t &word) const { + if (index + 2 > Bytes.size()) + return false; + if (E == support::little) + word = *reinterpret_cast(&Bytes[index]); + else if (E == support::big) + word = *reinterpret_cast(&Bytes[index]); + return true; +} + +bool MCDisassembler::getDoubleWord(support::endianness E, + ArrayRef Bytes, size_t index, + uint32_t &doubleWord) const { + if (index + 4 > Bytes.size()) + return false; + if (E == support::little) + doubleWord = *reinterpret_cast(&Bytes[index]); + else if (E == support::big) + doubleWord = *reinterpret_cast(&Bytes[index]); + return true; +} + +bool MCDisassembler::getQuadWord(support::endianness E, ArrayRef Bytes, + size_t index, uint64_t &quadWord) const { + if (index + 8 > Bytes.size()) + return false; + if (E == support::little) + quadWord = *reinterpret_cast(&Bytes[index]); + else if (E == support::big) + quadWord = *reinterpret_cast(&Bytes[index]); + return true; } bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -46,7 +46,7 @@ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; - DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, + DecodeStatus onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; @@ -121,28 +121,28 @@ } MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( - StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, - raw_ostream &CStream) const { + SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, + uint64_t Address, raw_ostream &CStream) const { Size = 0; if (Address == 0) { // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return MCDisassembler::Ignore; outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return MCDisassembler::Ignore; if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) - return MCDisassembler::Fail; + return MCDisassembler::Ignore; for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; @@ -152,7 +152,7 @@ } } outs() << "\n"; - return MCDisassembler::Success; + return MCDisassembler::SoftFail; } MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1396,7 +1396,7 @@ if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) + // skip amd_kernel_code_t at the begining of kernel symbol (256bytes) Start += 256; } if (SI == SE - 1 || @@ -1405,8 +1405,8 @@ // cut up to 256 bytes const uint64_t EndAlign = 256; const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast(&Bytes[End - 4]) == 0) + while (End > Limit && *reinterpret_cast( + &Bytes[End - 4]) == 0) End -= 4; } } @@ -1427,10 +1427,23 @@ continue; } - // Some targets (like WebAssembly) have a special prelude at the start - // of each symbol. - DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); + MCDisassembler::DecodeStatus status = DisAsm->onSymbolStart( + Symbols[SI], Size, Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + + if (status == MCDisassembler::Success || status == MCDisassembler::Fail) { + if (status == MCDisassembler::Fail) + outs() << "Error in decoding " << SymbolName << "\n"; + continue; + } + + // Only fall back to normal flow if the symbol is decoded partially + // (SoftFail) or when the target ignores the symbol for special treatment. + // Right now: + // - WebAssembly decodes preludes (return SoftFail / Ignore) + // - AMDGPU needs it to decode kernel descriptors + // (return Success / Fail / Ignore) + Start += Size; Index = Start;