Skip to content

Commit 16c1682

Browse files
committedMay 10, 2018
[WebAssembly] Initial Disassembler.
This implements a new table-gen emitter to create tables for a wasm disassembler, and a dissassembler to use them. Comes with 2 tests, that tests a few instructions manually. Is also able to disassemble large .wasm files with objdump reasonably. Not working so well, to be addressed in followups: - objdump appears to be passing an incorrect starting point. - since the disassembler works an instruction at a time, and it is disassembling stack instruction, it has no idea of pseudo register assignments. These registers are required for the instruction printing code that follows. For now, all such registers appear in the output as $0. Patch by Wouter van Oortmerssen Differential Revision: https://reviews.llvm.org/D45848 llvm-svn: 332052
1 parent 65e9f1f commit 16c1682

12 files changed

+375
-15
lines changed
 

‎llvm/lib/Target/WebAssembly/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS WebAssembly.td)
33
tablegen(LLVM WebAssemblyGenAsmMatcher.inc -gen-asm-matcher)
44
tablegen(LLVM WebAssemblyGenAsmWriter.inc -gen-asm-writer)
55
tablegen(LLVM WebAssemblyGenDAGISel.inc -gen-dag-isel)
6+
tablegen(LLVM WebAssemblyGenDisassemblerTables.inc -gen-disassembler)
67
tablegen(LLVM WebAssemblyGenFastISel.inc -gen-fast-isel)
78
tablegen(LLVM WebAssemblyGenInstrInfo.inc -gen-instr-info)
89
tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)

‎llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp

+121-5
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,23 @@
1919
#include "WebAssembly.h"
2020
#include "llvm/MC/MCContext.h"
2121
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
22+
#include "llvm/MC/MCFixedLenDisassembler.h"
2223
#include "llvm/MC/MCInst.h"
2324
#include "llvm/MC/MCInstrInfo.h"
2425
#include "llvm/MC/MCSubtargetInfo.h"
2526
#include "llvm/MC/MCSymbol.h"
2627
#include "llvm/Support/Endian.h"
28+
#include "llvm/Support/LEB128.h"
2729
#include "llvm/Support/TargetRegistry.h"
30+
2831
using namespace llvm;
2932

3033
#define DEBUG_TYPE "wasm-disassembler"
3134

35+
using DecodeStatus = MCDisassembler::DecodeStatus;
36+
37+
#include "WebAssemblyGenDisassemblerTables.inc"
38+
3239
namespace {
3340
class WebAssemblyDisassembler final : public MCDisassembler {
3441
std::unique_ptr<const MCInstrInfo> MCII;
@@ -60,11 +67,120 @@ extern "C" void LLVMInitializeWebAssemblyDisassembler() {
6067
createWebAssemblyDisassembler);
6168
}
6269

63-
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
64-
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
65-
raw_ostream &OS, raw_ostream &CS) const {
70+
static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
71+
if (Size >= Bytes.size())
72+
return -1;
73+
auto V = Bytes[Size];
74+
Size++;
75+
return V;
76+
}
6677

67-
// TODO: Implement disassembly.
78+
static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
79+
ArrayRef<uint8_t> Bytes, bool Signed) {
80+
unsigned N = 0;
81+
const char *Error = nullptr;
82+
auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
83+
Bytes.data() + Bytes.size(), &Error)
84+
: static_cast<int64_t>(
85+
decodeULEB128(Bytes.data() + Size, &N,
86+
Bytes.data() + Bytes.size(), &Error));
87+
if (Error)
88+
return false;
89+
Size += N;
90+
MI.addOperand(MCOperand::createImm(Val));
91+
return true;
92+
}
93+
94+
template <typename T>
95+
bool parseFPImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
96+
if (Size + sizeof(T) > Bytes.size())
97+
return false;
98+
T Val;
99+
memcpy(&Val, Bytes.data() + Size, sizeof(T));
100+
support::endian::byte_swap<T, support::endianness::little>(Val);
101+
Size += sizeof(T);
102+
MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
103+
return true;
104+
}
68105

69-
return MCDisassembler::Fail;
106+
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
107+
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
108+
raw_ostream & /*OS*/, raw_ostream &CS) const {
109+
CommentStream = &CS;
110+
Size = 0;
111+
auto Opc = nextByte(Bytes, Size);
112+
if (Opc < 0)
113+
return MCDisassembler::Fail;
114+
const auto *WasmInst = &InstructionTable0[Opc];
115+
// If this is a prefix byte, indirect to another table.
116+
if (WasmInst->ET == ET_Prefix) {
117+
WasmInst = nullptr;
118+
// Linear search, so far only 2 entries.
119+
for (auto PT = PrefixTable; PT->Table; PT++) {
120+
if (PT->Prefix == Opc) {
121+
WasmInst = PT->Table;
122+
break;
123+
}
124+
}
125+
if (!WasmInst)
126+
return MCDisassembler::Fail;
127+
Opc = nextByte(Bytes, Size);
128+
if (Opc < 0)
129+
return MCDisassembler::Fail;
130+
WasmInst += Opc;
131+
}
132+
if (WasmInst->ET == ET_Unused)
133+
return MCDisassembler::Fail;
134+
// At this point we must have a valid instruction to decode.
135+
assert(WasmInst->ET == ET_Instruction);
136+
MI.setOpcode(WasmInst->Opcode);
137+
// Parse any operands.
138+
for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
139+
switch (WasmInst->Operands[OPI]) {
140+
// ULEB operands:
141+
case WebAssembly::OPERAND_BASIC_BLOCK:
142+
case WebAssembly::OPERAND_LOCAL:
143+
case WebAssembly::OPERAND_GLOBAL:
144+
case WebAssembly::OPERAND_FUNCTION32:
145+
case WebAssembly::OPERAND_OFFSET32:
146+
case WebAssembly::OPERAND_P2ALIGN:
147+
case WebAssembly::OPERAND_TYPEINDEX:
148+
case MCOI::OPERAND_IMMEDIATE: {
149+
if (!parseLEBImmediate(MI, Size, Bytes, false))
150+
return MCDisassembler::Fail;
151+
break;
152+
}
153+
// SLEB operands:
154+
case WebAssembly::OPERAND_I32IMM:
155+
case WebAssembly::OPERAND_I64IMM:
156+
case WebAssembly::OPERAND_SIGNATURE: {
157+
if (!parseLEBImmediate(MI, Size, Bytes, true))
158+
return MCDisassembler::Fail;
159+
break;
160+
}
161+
// FP operands.
162+
case WebAssembly::OPERAND_F32IMM: {
163+
if (!parseFPImmediate<float>(MI, Size, Bytes))
164+
return MCDisassembler::Fail;
165+
break;
166+
}
167+
case WebAssembly::OPERAND_F64IMM: {
168+
if (!parseFPImmediate<double>(MI, Size, Bytes))
169+
return MCDisassembler::Fail;
170+
break;
171+
}
172+
case MCOI::OPERAND_REGISTER: {
173+
// These are NOT actually in the instruction stream, but MC is going to
174+
// expect operands to be present for them!
175+
// FIXME: can MC re-generate register assignments or do we have to
176+
// do this? Since this function decodes a single instruction, we don't
177+
// have the proper context for tracking an operand stack here.
178+
MI.addOperand(MCOperand::createReg(0));
179+
break;
180+
}
181+
default:
182+
llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
183+
}
184+
}
185+
return MCDisassembler::Success;
70186
}

‎llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp

+5-9
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
4646

4747
void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
4848
StringRef Annot,
49-
const MCSubtargetInfo & /*STI*/) {
49+
const MCSubtargetInfo &STI) {
5050
// Print the instruction (this uses the AsmStrings from the .td files).
5151
printInstruction(MI, OS);
5252

@@ -194,20 +194,16 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
194194
}
195195
}
196196

197-
void
198-
WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
199-
unsigned OpNo,
200-
raw_ostream &O) {
197+
void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(
198+
const MCInst *MI, unsigned OpNo, raw_ostream &O) {
201199
int64_t Imm = MI->getOperand(OpNo).getImm();
202200
if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
203201
return;
204202
O << ":p2align=" << Imm;
205203
}
206204

207-
void
208-
WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
209-
unsigned OpNo,
210-
raw_ostream &O) {
205+
void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(
206+
const MCInst *MI, unsigned OpNo, raw_ostream &O) {
211207
int64_t Imm = MI->getOperand(OpNo).getImm();
212208
switch (WebAssembly::ExprType(Imm)) {
213209
case WebAssembly::ExprType::Void: break;

‎llvm/lib/Target/WebAssembly/WebAssembly.td

+8
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,15 @@ def WebAssemblyAsmParser : AsmParser {
8282
let ShouldEmitMatchRegisterName = 0;
8383
}
8484

85+
def WebAssemblyAsmWriter : AsmWriter {
86+
string AsmWriterClassName = "InstPrinter";
87+
int PassSubtarget = 0;
88+
int Variant = 0;
89+
bit isMCAsmWriter = 1;
90+
}
91+
8592
def WebAssembly : Target {
8693
let InstructionSet = WebAssemblyInstrInfo;
8794
let AssemblyParsers = [WebAssemblyAsmParser];
95+
let AssemblyWriters = [WebAssemblyAsmWriter];
8896
}

‎llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td

+4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
5757
}
5858
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
5959

60+
// This is technically a control-flow instruction, since all it affects is the
61+
// IP.
62+
def NOP : I<(outs), (ins), [], "nop", 0x01>;
63+
6064
// Placemarkers to indicate the start or end of a block or loop scope.
6165
// These use/clobber VALUE_STACK to prevent them from being moved into the
6266
// middle of an expression tree.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
if not 'WebAssembly' in config.root.targets:
2+
config.unsupported = True
3+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# RUN: llvm-mc --disassemble %s -triple=wasm32-unknown-unknown | FileCheck %s
2+
3+
# CHECK: .text
4+
5+
# CHECK: nop
6+
0x01
7+
8+
# CHECK: i32.add $0=, $0, $0
9+
# NOTE: registers are meaningless, as there is no context for what they are.
10+
0x6a
11+
12+
# CHECK: i64.const $0=, -1
13+
0x42 0x7F
14+
15+
# CHECK: i64.load32_u $0=, 16($0):p2align=1
16+
0x35 0x01 0x10
17+
18+
# CHECK: block
19+
# 3
20+
# FIXME: WebAssemblyInstPrinter does not currently print block number.
21+
0x02 0x03
22+
23+
# CHECK: call_indirect
24+
# $0=, 128, 0
25+
# FIXME: WebAssemblyInstPrinter does not print immediates.
26+
0x11 0x80 0x01 0x00
27+
28+
# CHECK: get_local $0=, 128
29+
0x20 0x80 0x01
30+
31+
# Prefix byte example:
32+
# CHECK: i64.trunc_u:sat/f64 $0=, $0
33+
0xFC 0x07

‎llvm/unittests/MC/Disassembler.cpp

+44-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static const char *symbolLookupCallback(void *DisInfo, uint64_t ReferenceValue,
2121
return nullptr;
2222
}
2323

24-
TEST(Disassembler, Test1) {
24+
TEST(Disassembler, X86Test) {
2525
llvm::InitializeAllTargetInfos();
2626
llvm::InitializeAllTargetMCs();
2727
llvm::InitializeAllDisassemblers();
@@ -62,3 +62,46 @@ TEST(Disassembler, Test1) {
6262

6363
LLVMDisasmDispose(DCR);
6464
}
65+
66+
TEST(Disassembler, WebAssemblyTest) {
67+
llvm::InitializeAllTargetInfos();
68+
llvm::InitializeAllTargetMCs();
69+
llvm::InitializeAllDisassemblers();
70+
71+
uint8_t Bytes[] = {0x6a, 0x42, 0x7F, 0x35, 0x01, 0x10};
72+
uint8_t *BytesP = Bytes;
73+
const char OutStringSize = 100;
74+
char OutString[OutStringSize];
75+
LLVMDisasmContextRef DCR = LLVMCreateDisasm(
76+
"wasm32-unknown-unknown-elf", nullptr, 0, nullptr, symbolLookupCallback);
77+
if (!DCR)
78+
return;
79+
80+
size_t InstSize;
81+
unsigned NumBytes = sizeof(Bytes);
82+
unsigned PC = 0;
83+
84+
InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString,
85+
OutStringSize);
86+
EXPECT_EQ(InstSize, 1U);
87+
EXPECT_EQ(StringRef(OutString), "\ti32.add \t$0=, $0, $0");
88+
PC += InstSize;
89+
BytesP += InstSize;
90+
NumBytes -= InstSize;
91+
92+
InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString,
93+
OutStringSize);
94+
EXPECT_EQ(InstSize, 2U);
95+
EXPECT_EQ(StringRef(OutString), "\ti64.const\t$0=, -1");
96+
97+
PC += InstSize;
98+
BytesP += InstSize;
99+
NumBytes -= InstSize;
100+
101+
InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString,
102+
OutStringSize);
103+
EXPECT_EQ(InstSize, 3U);
104+
EXPECT_EQ(StringRef(OutString), "\ti64.load32_u\t$0=, 16($0):p2align=1");
105+
106+
LLVMDisasmDispose(DCR);
107+
}

‎llvm/utils/TableGen/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ add_tablegen(llvm-tblgen LLVM
4444
X86FoldTablesEmitter.cpp
4545
X86ModRMFilters.cpp
4646
X86RecognizableInstr.cpp
47+
WebAssemblyDisassemblerEmitter.cpp
4748
CTagsEmitter.cpp
4849
)
4950
set_target_properties(llvm-tblgen PROPERTIES FOLDER "Tablegenning")

‎llvm/utils/TableGen/DisassemblerEmitter.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//===----------------------------------------------------------------------===//
99

1010
#include "CodeGenTarget.h"
11+
#include "WebAssemblyDisassemblerEmitter.h"
1112
#include "X86DisassemblerTables.h"
1213
#include "X86RecognizableInstr.h"
1314
#include "llvm/TableGen/Error.h"
@@ -125,6 +126,14 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
125126
return;
126127
}
127128

129+
// WebAssembly has variable length opcodes, so can't use EmitFixedLenDecoder
130+
// below (which depends on a Size table-gen Record), and also uses a custom
131+
// disassembler.
132+
if (Target.getName() == "WebAssembly") {
133+
emitWebAssemblyDisassemblerTables(OS, Target.getInstructionsByEnumValue());
134+
return;
135+
}
136+
128137
// ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
129138
if (Target.getName() == "ARM" || Target.getName() == "Thumb" ||
130139
Target.getName() == "AArch64" || Target.getName() == "ARM64") {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//===- WebAssemblyDisassemblerEmitter.cpp - Disassembler tables -*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file is part of the WebAssembly Disassembler Emitter.
11+
// It contains the implementation of the disassembler tables.
12+
// Documentation for the disassembler emitter in general can be found in
13+
// WebAssemblyDisassemblerEmitter.h.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "WebAssemblyDisassemblerEmitter.h"
18+
#include "llvm/TableGen/Record.h"
19+
20+
namespace llvm {
21+
22+
void emitWebAssemblyDisassemblerTables(
23+
raw_ostream &OS,
24+
const ArrayRef<const CodeGenInstruction *> &NumberedInstructions) {
25+
// First lets organize all opcodes by (prefix) byte. Prefix 0 is the
26+
// starting table.
27+
std::map<unsigned,
28+
std::map<unsigned, std::pair<unsigned, const CodeGenInstruction *>>>
29+
OpcodeTable;
30+
for (unsigned I = 0; I != NumberedInstructions.size(); ++I) {
31+
auto &CGI = *NumberedInstructions[I];
32+
auto &Def = *CGI.TheDef;
33+
if (!Def.getValue("Inst"))
34+
continue;
35+
auto &Inst = *Def.getValueAsBitsInit("Inst");
36+
auto Opc = static_cast<unsigned>(
37+
reinterpret_cast<IntInit *>(Inst.convertInitializerTo(IntRecTy::get()))
38+
->getValue());
39+
if (Opc == 0xFFFFFFFF)
40+
continue; // No opcode defined.
41+
assert(Opc <= 0xFFFF);
42+
auto Prefix = Opc >> 8;
43+
Opc = Opc & 0xFF;
44+
auto &CGIP = OpcodeTable[Prefix][Opc];
45+
if (!CGIP.second ||
46+
// Make sure we store the variant with the least amount of operands,
47+
// which is the one without explicit registers. Only few instructions
48+
// have these currently, would be good to have for all of them.
49+
// FIXME: this picks the first of many typed variants, which is
50+
// currently the except_ref one, though this shouldn't matter for
51+
// disassembly purposes.
52+
CGIP.second->Operands.OperandList.size() >
53+
CGI.Operands.OperandList.size()) {
54+
CGIP = std::make_pair(I, &CGI);
55+
}
56+
}
57+
OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n";
58+
OS << "\n";
59+
OS << "namespace llvm {\n\n";
60+
OS << "enum EntryType : uint8_t { ";
61+
OS << "ET_Unused, ET_Prefix, ET_Instruction };\n\n";
62+
OS << "struct WebAssemblyInstruction {\n";
63+
OS << " uint16_t Opcode;\n";
64+
OS << " EntryType ET;\n";
65+
OS << " uint8_t NumOperands;\n";
66+
OS << " uint8_t Operands[4];\n";
67+
OS << "};\n\n";
68+
// Output one table per prefix.
69+
for (auto &PrefixPair : OpcodeTable) {
70+
if (PrefixPair.second.empty())
71+
continue;
72+
OS << "WebAssemblyInstruction InstructionTable" << PrefixPair.first;
73+
OS << "[] = {\n";
74+
for (unsigned I = 0; I <= 0xFF; I++) {
75+
auto InstIt = PrefixPair.second.find(I);
76+
if (InstIt != PrefixPair.second.end()) {
77+
// Regular instruction.
78+
assert(InstIt->second.second);
79+
auto &CGI = *InstIt->second.second;
80+
OS << " // 0x";
81+
OS.write_hex(static_cast<unsigned long long>(I));
82+
OS << ": " << CGI.AsmString << "\n";
83+
OS << " { " << InstIt->second.first << ", ET_Instruction, ";
84+
OS << CGI.Operands.OperandList.size() << ", {\n";
85+
for (auto &Op : CGI.Operands.OperandList) {
86+
OS << " " << Op.OperandType << ",\n";
87+
}
88+
OS << " }\n";
89+
} else {
90+
auto PrefixIt = OpcodeTable.find(I);
91+
// If we have a non-empty table for it that's not 0, this is a prefix.
92+
if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) {
93+
OS << " { 0, ET_Prefix, 0, {}";
94+
} else {
95+
OS << " { 0, ET_Unused, 0, {}";
96+
}
97+
}
98+
OS << " },\n";
99+
}
100+
OS << "};\n\n";
101+
}
102+
// Create a table of all extension tables:
103+
OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n";
104+
OS << "PrefixTable[] = {\n";
105+
for (auto &PrefixPair : OpcodeTable) {
106+
if (PrefixPair.second.empty() || !PrefixPair.first)
107+
continue;
108+
OS << " { " << PrefixPair.first << ", InstructionTable"
109+
<< PrefixPair.first;
110+
OS << " },\n";
111+
}
112+
OS << " { 0, nullptr }\n};\n\n";
113+
OS << "} // End llvm namespace\n";
114+
}
115+
116+
} // namespace llvm
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//===- WebAssemblyDisassemblerEmitter.h - Disassembler tables ---*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file is part of the WebAssembly Disassembler Emitter.
11+
// It contains the interface of the disassembler tables.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
16+
#define LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
17+
18+
#include "CodeGenInstruction.h"
19+
#include "llvm/ADT/ArrayRef.h"
20+
#include "llvm/Support/raw_ostream.h"
21+
22+
namespace llvm {
23+
24+
void emitWebAssemblyDisassemblerTables(
25+
raw_ostream &OS,
26+
const ArrayRef<const CodeGenInstruction *> &NumberedInstructions);
27+
28+
} // namespace llvm
29+
30+
#endif

0 commit comments

Comments
 (0)
Please sign in to comment.