Index: include/llvm/MC/MCDisassembler/MCDisassembler.h =================================================================== --- include/llvm/MC/MCDisassembler/MCDisassembler.h +++ include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -17,6 +17,7 @@ namespace llvm { template class ArrayRef; +class StringRef; class MCContext; class MCInst; class MCSubtargetInfo; @@ -80,6 +81,23 @@ raw_ostream &VStream, raw_ostream &CStream) const = 0; + /// May parse any prelude that precedes instructions after the start of a + /// symbol. Needed for some targets, e.g. WebAssembly. + /// + /// \param Name - The name of the symbol. + /// \param Size - The number of bytes consumed. + /// \param Address - The address, in the memory space of region, of the first + /// byte of the symbol. + /// \param Bytes - A reference to the actual bytes of the instruction. + /// \param VStream - The stream to print warnings and diagnostic messages on. + /// \param CStream - The stream to print comments and annotations on. + /// \return - MCDisassembler::Success if the bytes are valid, + /// MCDisassembler::Fail if the bytes were invalid. + virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const; + private: MCContext &Ctx; Index: include/llvm/Object/Wasm.h =================================================================== --- include/llvm/Object/Wasm.h +++ include/llvm/Object/Wasm.h @@ -222,6 +222,7 @@ bool isValidDataSymbol(uint32_t Index) const; bool isValidSectionSymbol(uint32_t Index) const; wasm::WasmFunction &getDefinedFunction(uint32_t Index); + const wasm::WasmFunction &getDefinedFunction(uint32_t Index) const; wasm::WasmGlobal &getDefinedGlobal(uint32_t Index); wasm::WasmEvent &getDefinedEvent(uint32_t Index); Index: lib/MC/MCDisassembler/MCDisassembler.cpp =================================================================== --- lib/MC/MCDisassembler/MCDisassembler.cpp +++ lib/MC/MCDisassembler/MCDisassembler.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include @@ -15,6 +17,13 @@ MCDisassembler::~MCDisassembler() = default; +MCDisassembler::DecodeStatus MCDisassembler::onSymbolStart( + StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, + raw_ostream &VStream, raw_ostream &CStream) const { + Size = 0; + return MCDisassembler::Success; +} + bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, Index: lib/MC/MCParser/WasmAsmParser.cpp =================================================================== --- lib/MC/MCParser/WasmAsmParser.cpp +++ lib/MC/MCParser/WasmAsmParser.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" @@ -83,8 +84,16 @@ } bool parseSectionDirective(StringRef, SMLoc) { - // FIXME: .section currently no-op. - while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex(); + StringRef Name; + if (Parser->parseIdentifier(Name)) + return TokError("expected identifier in directive"); + // FIXME: currently requiring this very fixed format. + if (Expect(AsmToken::Comma, ",") || Expect(AsmToken::String, "string") || + Expect(AsmToken::Comma, ",") || Expect(AsmToken::At, "@") || + Expect(AsmToken::EndOfStatement, "eol")) + return true; + auto WS = getContext().getWasmSection(Name, SectionKind::getText()); + getStreamer().SwitchSection(WS); return false; } @@ -95,15 +104,13 @@ if (Parser->parseIdentifier(Name)) return TokError("expected identifier in directive"); auto Sym = getContext().getOrCreateSymbol(Name); - if (Lexer->isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); + if (Expect(AsmToken::Comma, ",")) + return true; const MCExpr *Expr; if (Parser->parseExpression(Expr)) return true; - if (Lexer->isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - Lex(); + if (Expect(AsmToken::EndOfStatement, "eol")) + return true; // MCWasmStreamer implements this. getStreamer().emitELFSize(Sym, Expr); return false; Index: lib/Object/WasmObjectFile.cpp =================================================================== --- lib/Object/WasmObjectFile.cpp +++ lib/Object/WasmObjectFile.cpp @@ -1010,6 +1010,12 @@ return Functions[Index - NumImportedFunctions]; } +const wasm::WasmFunction & +WasmObjectFile::getDefinedFunction(uint32_t Index) const { + assert(isDefinedFunctionIndex(Index)); + return Functions[Index - NumImportedFunctions]; +} + wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) { assert(isDefinedGlobalIndex(Index)); return Globals[Index - NumImportedGlobals]; @@ -1176,7 +1182,12 @@ } Expected WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { - return getSymbolValue(Symb); + auto &Sym = getWasmSymbol(Symb); + if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION && + isDefinedFunctionIndex(Sym.Info.ElementIndex)) + return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset; + else + return getSymbolValue(Symb); } uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const { Index: lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp =================================================================== --- lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -298,6 +298,8 @@ Type == "i32x4" || Type == "i64x2" || Type == "f32x4" || Type == "f64x2") return wasm::ValType::V128; + if (Type == "except_ref") + return wasm::ValType::EXCEPT_REF; return Optional(); } @@ -317,7 +319,7 @@ while (Lexer.is(AsmToken::Identifier)) { auto Type = parseType(Lexer.getTok().getString()); if (!Type) - return true; + return error("unknown type: ", Lexer.getTok()); Types.push_back(Type.getValue()); Parser.Lex(); if (!isNext(AsmToken::Comma)) @@ -561,6 +563,7 @@ auto &Out = getStreamer(); auto &TOut = reinterpret_cast(*Out.getTargetStreamer()); + auto &Ctx = Out.getContext(); // TODO: any time we return an error, at least one token must have been // consumed, otherwise this will not signal an error to the caller. @@ -578,8 +581,7 @@ if (!Type) return error("Unknown type in .globaltype directive: ", TypeTok); // Now set this symbol with the correct type. - auto WasmSym = cast( - TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); + auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType( wasm::WasmGlobalType{uint8_t(Type.getValue()), true}); @@ -597,8 +599,7 @@ auto SymName = expectIdent(); if (SymName.empty()) return true; - auto WasmSym = cast( - TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); + auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); if (CurrentState == Label && WasmSym == LastLabel) { // This .functype indicates a start of a function. if (ensureEmptyNestingStack()) @@ -621,8 +622,7 @@ auto SymName = expectIdent(); if (SymName.empty()) return true; - auto WasmSym = cast( - TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); + auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); auto Signature = make_unique(); if (parseRegTypeList(Signature->Params)) return true; Index: lib/Target/WebAssembly/Disassembler/LLVMBuild.txt =================================================================== --- lib/Target/WebAssembly/Disassembler/LLVMBuild.txt +++ lib/Target/WebAssembly/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = WebAssemblyDisassembler parent = WebAssembly -required_libraries = MCDisassembler WebAssemblyInfo Support +required_libraries = MCDisassembler WebAssemblyInfo WebAssemblyAsmPrinter Support add_to_library_groups = WebAssembly Index: lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp =================================================================== --- lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -15,6 +15,7 @@ /// //===----------------------------------------------------------------------===// +#include "InstPrinter/WebAssemblyInstPrinter.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" @@ -45,6 +46,10 @@ ArrayRef Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; + DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; public: WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, @@ -77,7 +82,7 @@ } static bool nextLEB(int64_t &Val, ArrayRef Bytes, uint64_t &Size, - bool Signed = false) { + bool Signed) { unsigned N = 0; const char *Error = nullptr; Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, @@ -116,6 +121,41 @@ return true; } +MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( + StringRef Name, uint64_t &Size, ArrayRef Bytes, uint64_t Address, + raw_ostream &VStream, raw_ostream &CStream) const { + Size = 0; + if (Address == 0) { + // Start of a code section: we're parsing only the function count. + int64_t FunctionCount; + if (!nextLEB(FunctionCount, Bytes, Size, false)) + return MCDisassembler::Fail; + outs() << " # " << FunctionCount << " functions in section."; + } else { + // Parse the start of a single function. + int64_t BodySize, LocalEntryCount; + if (!nextLEB(BodySize, Bytes, Size, false) || + !nextLEB(LocalEntryCount, Bytes, Size, false)) + return MCDisassembler::Fail; + if (LocalEntryCount) { + outs() << " .local "; + for (int64_t I = 0; I < LocalEntryCount; I++) { + int64_t Count, Type; + if (!nextLEB(Count, Bytes, Size, false) || + !nextLEB(Type, Bytes, Size, false)) + return MCDisassembler::Fail; + for (int64_t J = 0; J < Count; J++) { + if (I || J) + outs() << ", "; + outs() << WebAssembly::anyTypeToString(Type); + } + } + } + } + outs() << "\n"; + return MCDisassembler::Success; +} + MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t /*Address*/, raw_ostream & /*OS*/, raw_ostream &CS) const { @@ -138,7 +178,7 @@ if (!WasmInst) return MCDisassembler::Fail; int64_t PrefixedOpc; - if (!nextLEB(PrefixedOpc, Bytes, Size)) + if (!nextLEB(PrefixedOpc, Bytes, Size, false)) return MCDisassembler::Fail; if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) return MCDisassembler::Fail; Index: test/MC/WebAssembly/objdump.s =================================================================== --- /dev/null +++ test/MC/WebAssembly/objdump.s @@ -0,0 +1,34 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o %t.o -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s +# RUN: llvm-objdump -triple=wasm32-unknown-unknown -disassemble %t.o | FileCheck %s + + .section .text.main1,"",@ + .type test0,@function +test0: + .functype test0 (i32, i64) -> (i32) + .local f32, f64, v128, v128 + local.get 2 + end_function +.Lfunc_end0: + .size test0, .Lfunc_end0-test0 + + .section .text.main2,"",@ + .type test1,@function +test1: + .functype test1 (i32, i64) -> (i32) + .local i32, i64, except_ref + local.get 3 + end_function +.Lfunc_end1: + .size test1, .Lfunc_end1-test1 + + +# CHECK-LABEL: CODE: +# CHECK: # 2 functions in section. +# CHECK-LABEL: test0: +# CHECK-NEXT: .local f32, f64, v128, v128 +# CHECK-NEXT: 9: 20 02 local.get 2 +# CHECK-NEXT: b: 0b end_block +# CHECK-LABEL: test1: +# CHECK-NEXT: .local i32, i64, except_ref +# CHECK-NEXT: 14: 20 03 local.get 3 +# CHECK-NEXT: 16: 0b end_block Index: test/MC/WebAssembly/weak-alias.ll =================================================================== --- test/MC/WebAssembly/weak-alias.ll +++ test/MC/WebAssembly/weak-alias.ll @@ -210,13 +210,13 @@ ; CHECK-NEXT: ... ; CHECK-SYMS: SYMBOL TABLE: -; CHECK-SYMS-NEXT: 00000000 g F CODE .hidden foo -; CHECK-SYMS-NEXT: 00000001 g F CODE .hidden call_direct -; CHECK-SYMS-NEXT: 00000002 g F CODE .hidden call_alias -; CHECK-SYMS-NEXT: 00000000 gw F CODE .hidden foo_alias -; CHECK-SYMS-NEXT: 00000003 g F CODE .hidden call_direct_ptr +; CHECK-SYMS-NEXT: 00000001 g F CODE .hidden foo +; CHECK-SYMS-NEXT: 00000006 g F CODE .hidden call_direct +; CHECK-SYMS-NEXT: 0000000f g F CODE .hidden call_alias +; CHECK-SYMS-NEXT: 00000001 gw F CODE .hidden foo_alias +; CHECK-SYMS-NEXT: 00000018 g F CODE .hidden call_direct_ptr ; CHECK-SYMS-NEXT: 00000008 g O DATA direct_address -; CHECK-SYMS-NEXT: 00000004 g F CODE .hidden call_alias_ptr +; CHECK-SYMS-NEXT: 0000002b g F CODE .hidden call_alias_ptr ; CHECK-SYMS-NEXT: 00000010 g O DATA alias_address ; CHECK-SYMS-NEXT: 00000000 g O DATA bar ; CHECK-SYMS-NEXT: 00000000 gw O DATA .hidden bar_alias Index: test/tools/llvm-objdump/WebAssembly/symbol-table.test =================================================================== --- test/tools/llvm-objdump/WebAssembly/symbol-table.test +++ test/tools/llvm-objdump/WebAssembly/symbol-table.test @@ -1,9 +1,9 @@ RUN: llvm-objdump -t %p/../Inputs/trivial.obj.wasm | FileCheck %s CHECK: SYMBOL TABLE: -CHECK-NEXT: 00000002 g F CODE main +CHECK-NEXT: 00000001 g F CODE main CHECK-NEXT: 00000000 l O DATA .L.str CHECK-NEXT: 00000000 g F *UND* puts -CHECK-NEXT: 00000003 l F CODE .LSomeOtherFunction_bitcast +CHECK-NEXT: 00000019 l F CODE .LSomeOtherFunction_bitcast CHECK-NEXT: 00000000 g F *UND* SomeOtherFunction CHECK-NEXT: 00000010 g O DATA var Index: tools/llvm-objdump/llvm-objdump.cpp =================================================================== --- tools/llvm-objdump/llvm-objdump.cpp +++ tools/llvm-objdump/llvm-objdump.cpp @@ -1646,6 +1646,12 @@ raw_ostream &DebugOut = nulls(); #endif + // Some targets (like WebAssembly) have a special prelude at the start + // of each symbol. + DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start), + SectionAddr + Start, DebugOut, CommentStream); + Start += Size; + for (Index = Start; Index < End; Index += Size) { MCInst Inst;