Index: lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- lib/Target/WebAssembly/CMakeLists.txt +++ lib/Target/WebAssembly/CMakeLists.txt @@ -3,6 +3,7 @@ tablegen(LLVM WebAssemblyGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM WebAssemblyGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM WebAssemblyGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM WebAssemblyGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM WebAssemblyGenFastISel.inc -gen-fast-isel) tablegen(LLVM WebAssemblyGenInstrInfo.inc -gen-instr-info) tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter) Index: lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp =================================================================== --- lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -19,16 +19,23 @@ #include "WebAssembly.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" + using namespace llvm; #define DEBUG_TYPE "wasm-disassembler" +using DecodeStatus = MCDisassembler::DecodeStatus; + +#include "WebAssemblyGenDisassemblerTables.inc" + namespace { class WebAssemblyDisassembler final : public MCDisassembler { std::unique_ptr MCII; @@ -60,11 +67,120 @@ createWebAssemblyDisassembler); } -MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( - MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t /*Address*/, - raw_ostream &OS, raw_ostream &CS) const { +static int nextByte(ArrayRef Bytes, uint64_t &Size) { + if (Size >= Bytes.size()) + return -1; + auto V = Bytes[Size]; + Size++; + return V; +} - // TODO: Implement disassembly. +static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, + ArrayRef Bytes, bool Signed) { + unsigned N = 0; + const char *Error = nullptr; + auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, + Bytes.data() + Bytes.size(), &Error) + : static_cast( + decodeULEB128(Bytes.data() + Size, &N, + Bytes.data() + Bytes.size(), &Error)); + if (Error) + return false; + Size += N; + MI.addOperand(MCOperand::createImm(Val)); + return true; +} + +template +bool parseFPImmediate(MCInst &MI, uint64_t &Size, ArrayRef Bytes) { + if (Size + sizeof(T) > Bytes.size()) + return false; + T Val; + memcpy(&Val, Bytes.data() + Size, sizeof(T)); + support::endian::byte_swap(Val); + Size += sizeof(T); + MI.addOperand(MCOperand::createFPImm(static_cast(Val))); + return true; +} - return MCDisassembler::Fail; +MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( + MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t /*Address*/, + raw_ostream & /*OS*/, raw_ostream &CS) const { + CommentStream = &CS; + Size = 0; + auto Opc = nextByte(Bytes, Size); + if (Opc < 0) + return MCDisassembler::Fail; + const auto *WasmInst = &InstructionTable0[Opc]; + // If this is a prefix byte, indirect to another table. + if (WasmInst->ET == ET_Prefix) { + WasmInst = nullptr; + // Linear search, so far only 2 entries. + for (auto PT = PrefixTable; PT->Table; PT++) { + if (PT->Prefix == Opc) { + WasmInst = PT->Table; + break; + } + } + if (!WasmInst) + return MCDisassembler::Fail; + Opc = nextByte(Bytes, Size); + if (Opc < 0) + return MCDisassembler::Fail; + WasmInst += Opc; + } + if (WasmInst->ET == ET_Unused) + return MCDisassembler::Fail; + // At this point we must have a valid instruction to decode. + assert(WasmInst->ET == ET_Instruction); + MI.setOpcode(WasmInst->Opcode); + // Parse any operands. + for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { + switch (WasmInst->Operands[OPI]) { + // ULEB operands: + case WebAssembly::OPERAND_BASIC_BLOCK: + case WebAssembly::OPERAND_LOCAL: + case WebAssembly::OPERAND_GLOBAL: + case WebAssembly::OPERAND_FUNCTION32: + case WebAssembly::OPERAND_OFFSET32: + case WebAssembly::OPERAND_P2ALIGN: + case WebAssembly::OPERAND_TYPEINDEX: + case MCOI::OPERAND_IMMEDIATE: { + if (!parseLEBImmediate(MI, Size, Bytes, false)) + return MCDisassembler::Fail; + break; + } + // SLEB operands: + case WebAssembly::OPERAND_I32IMM: + case WebAssembly::OPERAND_I64IMM: + case WebAssembly::OPERAND_SIGNATURE: { + if (!parseLEBImmediate(MI, Size, Bytes, true)) + return MCDisassembler::Fail; + break; + } + // FP operands. + case WebAssembly::OPERAND_F32IMM: { + if (!parseFPImmediate(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + case WebAssembly::OPERAND_F64IMM: { + if (!parseFPImmediate(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + case MCOI::OPERAND_REGISTER: { + // These are NOT actually in the instruction stream, but MC is going to + // expect operands to be present for them! + // FIXME: can MC re-generate register assignments or do we have to + // do this? Since this function decodes a single instruction, we don't + // have the proper context for tracking an operand stack here. + MI.addOperand(MCOperand::createReg(0)); + break; + } + default: + llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); + } + } + return MCDisassembler::Success; } Index: lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp =================================================================== --- lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -46,7 +46,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, - const MCSubtargetInfo & /*STI*/) { + const MCSubtargetInfo &STI) { // Print the instruction (this uses the AsmStrings from the .td files). printInstruction(MI, OS); @@ -194,20 +194,16 @@ } } -void -WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI, - unsigned OpNo, - raw_ostream &O) { +void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand( + const MCInst *MI, unsigned OpNo, raw_ostream &O) { int64_t Imm = MI->getOperand(OpNo).getImm(); if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode())) return; O << ":p2align=" << Imm; } -void -WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI, - unsigned OpNo, - raw_ostream &O) { +void WebAssemblyInstPrinter::printWebAssemblySignatureOperand( + const MCInst *MI, unsigned OpNo, raw_ostream &O) { int64_t Imm = MI->getOperand(OpNo).getImm(); switch (WebAssembly::ExprType(Imm)) { case WebAssembly::ExprType::Void: break; Index: lib/Target/WebAssembly/WebAssembly.td =================================================================== --- lib/Target/WebAssembly/WebAssembly.td +++ lib/Target/WebAssembly/WebAssembly.td @@ -82,7 +82,15 @@ let ShouldEmitMatchRegisterName = 0; } +def WebAssemblyAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 0; + int Variant = 0; + bit isMCAsmWriter = 1; +} + def WebAssembly : Target { let InstructionSet = WebAssemblyInstrInfo; let AssemblyParsers = [WebAssemblyAsmParser]; + let AssemblyWriters = [WebAssemblyAsmWriter]; } Index: lib/Target/WebAssembly/WebAssemblyInstrControl.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -57,6 +57,10 @@ } } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 +// This is technically a control-flow instruction, since all it affects is the +// IP. +def NOP : I<(outs), (ins), [], "nop", 0x01>; + // Placemarkers to indicate the start or end of a block or loop scope. // These use/clobber VALUE_STACK to prevent them from being moved into the // middle of an expression tree. Index: test/MC/Disassembler/WebAssembly/lit.local.cfg =================================================================== --- /dev/null +++ test/MC/Disassembler/WebAssembly/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'WebAssembly' in config.root.targets: + config.unsupported = True + Index: test/MC/Disassembler/WebAssembly/wasm.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/WebAssembly/wasm.txt @@ -0,0 +1,33 @@ +# RUN: llvm-mc --disassemble %s -triple=wasm32-unknown-unknown-elf | FileCheck %s + +# CHECK: .text + +# CHECK: nop +0x01 + +# CHECK: i32.add $0=, $0, $0 +# NOTE: registers are meaningless, as there is no context for what they are. +0x6a + +# CHECK: i64.const $0=, -1 +0x42 0x7F + +# CHECK: i64.load32_u $0=, 16($0):p2align=1 +0x35 0x01 0x10 + +# CHECK: block +# 3 +# FIXME: WebAssemblyInstPrinter does not currently print block number. +0x02 0x03 + +# CHECK: call_indirect +# $0=, 128, 0 +# FIXME: WebAssemblyInstPrinter does not print immediates. +0x11 0x80 0x01 0x00 + +# CHECK: get_local $0=, 128 +0x20 0x80 0x01 + +# Prefix byte example: +# CHECK: i64.trunc_u:sat/f64 $0=, $0 +0xFC 0x07 Index: unittests/MC/Disassembler.cpp =================================================================== --- unittests/MC/Disassembler.cpp +++ unittests/MC/Disassembler.cpp @@ -21,7 +21,7 @@ return nullptr; } -TEST(Disassembler, Test1) { +TEST(Disassembler, X86Test) { llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargetMCs(); llvm::InitializeAllDisassemblers(); @@ -62,3 +62,46 @@ LLVMDisasmDispose(DCR); } + +TEST(Disassembler, WebAssemblyTest) { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllDisassemblers(); + + uint8_t Bytes[] = {0x6a, 0x42, 0x7F, 0x35, 0x01, 0x10}; + uint8_t *BytesP = Bytes; + const char OutStringSize = 100; + char OutString[OutStringSize]; + LLVMDisasmContextRef DCR = LLVMCreateDisasm( + "wasm32-unknown-unknown-elf", nullptr, 0, nullptr, symbolLookupCallback); + if (!DCR) + return; + + size_t InstSize; + unsigned NumBytes = sizeof(Bytes); + unsigned PC = 0; + + InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString, + OutStringSize); + EXPECT_EQ(InstSize, 1U); + EXPECT_EQ(StringRef(OutString), "\ti32.add \t$0=, $0, $0"); + PC += InstSize; + BytesP += InstSize; + NumBytes -= InstSize; + + InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString, + OutStringSize); + EXPECT_EQ(InstSize, 2U); + EXPECT_EQ(StringRef(OutString), "\ti64.const\t$0=, -1"); + + PC += InstSize; + BytesP += InstSize; + NumBytes -= InstSize; + + InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString, + OutStringSize); + EXPECT_EQ(InstSize, 3U); + EXPECT_EQ(StringRef(OutString), "\ti64.load32_u\t$0=, 16($0):p2align=1"); + + LLVMDisasmDispose(DCR); +} Index: utils/TableGen/CMakeLists.txt =================================================================== --- utils/TableGen/CMakeLists.txt +++ utils/TableGen/CMakeLists.txt @@ -43,6 +43,7 @@ X86FoldTablesEmitter.cpp X86ModRMFilters.cpp X86RecognizableInstr.cpp + WebAssemblyDisassemblerEmitter.cpp CTagsEmitter.cpp ) set_target_properties(llvm-tblgen PROPERTIES FOLDER "Tablegenning") Index: utils/TableGen/DisassemblerEmitter.cpp =================================================================== --- utils/TableGen/DisassemblerEmitter.cpp +++ utils/TableGen/DisassemblerEmitter.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenTarget.h" +#include "WebAssemblyDisassemblerEmitter.h" #include "X86DisassemblerTables.h" #include "X86RecognizableInstr.h" #include "llvm/TableGen/Error.h" @@ -125,6 +126,14 @@ return; } + // WebAssembly has variable length opcodes, so can't use EmitFixedLenDecoder + // below (which depends on a Size table-gen Record), and also uses a custom + // disassembler. + if (Target.getName() == "WebAssembly") { + emitWebAssemblyDisassemblerTables(OS, Target.getInstructionsByEnumValue()); + return; + } + // ARM and Thumb have a CHECK() macro to deal with DecodeStatuses. if (Target.getName() == "ARM" || Target.getName() == "Thumb" || Target.getName() == "AArch64" || Target.getName() == "ARM64") { Index: utils/TableGen/WebAssemblyDisassemblerEmitter.h =================================================================== --- /dev/null +++ utils/TableGen/WebAssemblyDisassemblerEmitter.h @@ -0,0 +1,30 @@ +//===- WebAssemblyDisassemblerEmitter.h - Disassembler tables ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the WebAssembly Disassembler Emitter. +// It contains the interface of the disassembler tables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H +#define LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H + +#include "CodeGenInstruction.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +void emitWebAssemblyDisassemblerTables( + raw_ostream &OS, + const ArrayRef &NumberedInstructions); + +} // namespace llvm + +#endif Index: utils/TableGen/WebAssemblyDisassemblerEmitter.cpp =================================================================== --- /dev/null +++ utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -0,0 +1,116 @@ +//===- WebAssemblyDisassemblerEmitter.cpp - Disassembler tables -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the WebAssembly Disassembler Emitter. +// It contains the implementation of the disassembler tables. +// Documentation for the disassembler emitter in general can be found in +// WebAssemblyDisassemblerEmitter.h. +// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyDisassemblerEmitter.h" +#include "llvm/TableGen/Record.h" + +namespace llvm { + +void emitWebAssemblyDisassemblerTables( + raw_ostream &OS, + const ArrayRef &NumberedInstructions) { + // First lets organize all opcodes by (prefix) byte. Prefix 0 is the + // starting table. + std::map>> + OpcodeTable; + for (unsigned I = 0; I != NumberedInstructions.size(); ++I) { + auto &CGI = *NumberedInstructions[I]; + auto &Def = *CGI.TheDef; + if (!Def.getValue("Inst")) + continue; + auto &Inst = *Def.getValueAsBitsInit("Inst"); + auto Opc = static_cast( + reinterpret_cast(Inst.convertInitializerTo(IntRecTy::get())) + ->getValue()); + if (Opc == 0xFFFFFFFF) + continue; // No opcode defined. + assert(Opc <= 0xFFFF); + auto Prefix = Opc >> 8; + Opc = Opc & 0xFF; + auto &CGIP = OpcodeTable[Prefix][Opc]; + if (!CGIP.second || + // Make sure we store the variant with the least amount of operands, + // which is the one without explicit registers. Only few instructions + // have these currently, would be good to have for all of them. + // FIXME: this picks the first of many typed variants, which is + // currently the except_ref one, though this shouldn't matter for + // disassembly purposes. + CGIP.second->Operands.OperandList.size() > + CGI.Operands.OperandList.size()) { + CGIP = std::make_pair(I, &CGI); + } + } + OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n"; + OS << "\n"; + OS << "namespace llvm {\n\n"; + OS << "enum EntryType : uint8_t { "; + OS << "ET_Unused, ET_Prefix, ET_Instruction };\n\n"; + OS << "struct WebAssemblyInstruction {\n"; + OS << " uint16_t Opcode;\n"; + OS << " EntryType ET;\n"; + OS << " uint8_t NumOperands;\n"; + OS << " uint8_t Operands[4];\n"; + OS << "};\n\n"; + // Output one table per prefix. + for (auto &PrefixPair : OpcodeTable) { + if (PrefixPair.second.empty()) + continue; + OS << "WebAssemblyInstruction InstructionTable" << PrefixPair.first; + OS << "[] = {\n"; + for (unsigned I = 0; I <= 0xFF; I++) { + auto InstIt = PrefixPair.second.find(I); + if (InstIt != PrefixPair.second.end()) { + // Regular instruction. + assert(InstIt->second.second); + auto &CGI = *InstIt->second.second; + OS << " // 0x"; + OS.write_hex(static_cast(I)); + OS << ": " << CGI.AsmString << "\n"; + OS << " { " << InstIt->second.first << ", ET_Instruction, "; + OS << CGI.Operands.OperandList.size() << ", {\n"; + for (auto &Op : CGI.Operands.OperandList) { + OS << " " << Op.OperandType << ",\n"; + } + OS << " }\n"; + } else { + auto PrefixIt = OpcodeTable.find(I); + // If we have a non-empty table for it that's not 0, this is a prefix. + if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) { + OS << " { 0, ET_Prefix, 0, {}"; + } else { + OS << " { 0, ET_Unused, 0, {}"; + } + } + OS << " },\n"; + } + OS << "};\n\n"; + } + // Create a table of all extension tables: + OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n"; + OS << "PrefixTable[] = {\n"; + for (auto &PrefixPair : OpcodeTable) { + if (PrefixPair.second.empty() || !PrefixPair.first) + continue; + OS << " { " << PrefixPair.first << ", InstructionTable" + << PrefixPair.first; + OS << " },\n"; + } + OS << " { 0, nullptr }\n};\n\n"; + OS << "} // End llvm namespace\n"; +} + +} // namespace llvm