Skip to content

Commit f3b762a

Browse files
committedJan 17, 2019
[WebAssembly] Fixed objdump not parsing function headers.
Summary: objdump was interpreting the function header containing the locals declaration as instructions. To parse these without injecting target specific code in objdump, MCDisassembler::onSymbolStart was added to be implemented by the WebAssembly implemention. WasmObjectFile now returns a code offset for the "address" of a symbol, rather than the index. This is also more in-line with what other targets do. Also ensured that the AsmParser correctly puts each function in its own segment to enable this test case. Reviewers: sbc100, dschuff Subscribers: jgravelle-google, aheejin, sunfish, rupprecht, llvm-commits Differential Revision: https://reviews.llvm.org/D56684 llvm-svn: 351460
1 parent 194d00e commit f3b762a

File tree

12 files changed

+153
-27
lines changed

12 files changed

+153
-27
lines changed
 

‎llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h

+18
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
namespace llvm {
1818

1919
template <typename T> class ArrayRef;
20+
class StringRef;
2021
class MCContext;
2122
class MCInst;
2223
class MCSubtargetInfo;
@@ -80,6 +81,23 @@ class MCDisassembler {
8081
raw_ostream &VStream,
8182
raw_ostream &CStream) const = 0;
8283

84+
/// May parse any prelude that precedes instructions after the start of a
85+
/// symbol. Needed for some targets, e.g. WebAssembly.
86+
///
87+
/// \param Name - The name of the symbol.
88+
/// \param Size - The number of bytes consumed.
89+
/// \param Address - The address, in the memory space of region, of the first
90+
/// byte of the symbol.
91+
/// \param Bytes - A reference to the actual bytes at the symbol location.
92+
/// \param VStream - The stream to print warnings and diagnostic messages on.
93+
/// \param CStream - The stream to print comments and annotations on.
94+
/// \return - MCDisassembler::Success if the bytes are valid,
95+
/// MCDisassembler::Fail if the bytes were invalid.
96+
virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
97+
ArrayRef<uint8_t> Bytes, uint64_t Address,
98+
raw_ostream &VStream,
99+
raw_ostream &CStream) const;
100+
83101
private:
84102
MCContext &Ctx;
85103

‎llvm/include/llvm/Object/Wasm.h

+1
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ class WasmObjectFile : public ObjectFile {
222222
bool isValidDataSymbol(uint32_t Index) const;
223223
bool isValidSectionSymbol(uint32_t Index) const;
224224
wasm::WasmFunction &getDefinedFunction(uint32_t Index);
225+
const wasm::WasmFunction &getDefinedFunction(uint32_t Index) const;
225226
wasm::WasmGlobal &getDefinedGlobal(uint32_t Index);
226227
wasm::WasmEvent &getDefinedEvent(uint32_t Index);
227228

‎llvm/lib/MC/MCDisassembler/MCDisassembler.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,22 @@
88
//===----------------------------------------------------------------------===//
99

1010
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
11+
#include "llvm/ADT/ArrayRef.h"
12+
#include "llvm/ADT/StringRef.h"
1113
#include "llvm/Support/raw_ostream.h"
1214
#include <algorithm>
1315

1416
using namespace llvm;
1517

1618
MCDisassembler::~MCDisassembler() = default;
1719

20+
MCDisassembler::DecodeStatus MCDisassembler::onSymbolStart(
21+
StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
22+
raw_ostream &VStream, raw_ostream &CStream) const {
23+
Size = 0;
24+
return MCDisassembler::Success;
25+
}
26+
1827
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
1928
uint64_t Address, bool IsBranch,
2029
uint64_t Offset,

‎llvm/lib/MC/MCParser/WasmAsmParser.cpp

+15-8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/MC/MCParser/MCAsmLexer.h"
2323
#include "llvm/MC/MCParser/MCAsmParser.h"
2424
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
25+
#include "llvm/MC/MCSectionWasm.h"
2526
#include "llvm/MC/MCStreamer.h"
2627
#include "llvm/MC/MCSymbol.h"
2728
#include "llvm/MC/MCSymbolWasm.h"
@@ -83,8 +84,16 @@ class WasmAsmParser : public MCAsmParserExtension {
8384
}
8485

8586
bool parseSectionDirective(StringRef, SMLoc) {
86-
// FIXME: .section currently no-op.
87-
while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex();
87+
StringRef Name;
88+
if (Parser->parseIdentifier(Name))
89+
return TokError("expected identifier in directive");
90+
// FIXME: currently requiring this very fixed format.
91+
if (Expect(AsmToken::Comma, ",") || Expect(AsmToken::String, "string") ||
92+
Expect(AsmToken::Comma, ",") || Expect(AsmToken::At, "@") ||
93+
Expect(AsmToken::EndOfStatement, "eol"))
94+
return true;
95+
auto WS = getContext().getWasmSection(Name, SectionKind::getText());
96+
getStreamer().SwitchSection(WS);
8897
return false;
8998
}
9099

@@ -95,15 +104,13 @@ class WasmAsmParser : public MCAsmParserExtension {
95104
if (Parser->parseIdentifier(Name))
96105
return TokError("expected identifier in directive");
97106
auto Sym = getContext().getOrCreateSymbol(Name);
98-
if (Lexer->isNot(AsmToken::Comma))
99-
return TokError("unexpected token in directive");
100-
Lex();
107+
if (Expect(AsmToken::Comma, ","))
108+
return true;
101109
const MCExpr *Expr;
102110
if (Parser->parseExpression(Expr))
103111
return true;
104-
if (Lexer->isNot(AsmToken::EndOfStatement))
105-
return TokError("unexpected token in directive");
106-
Lex();
112+
if (Expect(AsmToken::EndOfStatement, "eol"))
113+
return true;
107114
// MCWasmStreamer implements this.
108115
getStreamer().emitELFSize(Sym, Expr);
109116
return false;

‎llvm/lib/Object/WasmObjectFile.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,12 @@ wasm::WasmFunction &WasmObjectFile::getDefinedFunction(uint32_t Index) {
10551055
return Functions[Index - NumImportedFunctions];
10561056
}
10571057

1058+
const wasm::WasmFunction &
1059+
WasmObjectFile::getDefinedFunction(uint32_t Index) const {
1060+
assert(isDefinedFunctionIndex(Index));
1061+
return Functions[Index - NumImportedFunctions];
1062+
}
1063+
10581064
wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) {
10591065
assert(isDefinedGlobalIndex(Index));
10601066
return Globals[Index - NumImportedGlobals];
@@ -1221,7 +1227,12 @@ Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
12211227
}
12221228

12231229
Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
1224-
return getSymbolValue(Symb);
1230+
auto &Sym = getWasmSymbol(Symb);
1231+
if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION &&
1232+
isDefinedFunctionIndex(Sym.Info.ElementIndex))
1233+
return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset;
1234+
else
1235+
return getSymbolValue(Symb);
12251236
}
12261237

12271238
uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {

‎llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
298298
Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
299299
Type == "f64x2")
300300
return wasm::ValType::V128;
301+
if (Type == "except_ref")
302+
return wasm::ValType::EXCEPT_REF;
301303
return Optional<wasm::ValType>();
302304
}
303305

@@ -317,7 +319,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
317319
while (Lexer.is(AsmToken::Identifier)) {
318320
auto Type = parseType(Lexer.getTok().getString());
319321
if (!Type)
320-
return true;
322+
return error("unknown type: ", Lexer.getTok());
321323
Types.push_back(Type.getValue());
322324
Parser.Lex();
323325
if (!isNext(AsmToken::Comma))
@@ -561,6 +563,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
561563
auto &Out = getStreamer();
562564
auto &TOut =
563565
reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
566+
auto &Ctx = Out.getContext();
564567

565568
// TODO: any time we return an error, at least one token must have been
566569
// consumed, otherwise this will not signal an error to the caller.
@@ -578,8 +581,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
578581
if (!Type)
579582
return error("Unknown type in .globaltype directive: ", TypeTok);
580583
// Now set this symbol with the correct type.
581-
auto WasmSym = cast<MCSymbolWasm>(
582-
TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
584+
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
583585
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
584586
WasmSym->setGlobalType(
585587
wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
@@ -597,8 +599,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
597599
auto SymName = expectIdent();
598600
if (SymName.empty())
599601
return true;
600-
auto WasmSym = cast<MCSymbolWasm>(
601-
TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
602+
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
602603
if (CurrentState == Label && WasmSym == LastLabel) {
603604
// This .functype indicates a start of a function.
604605
if (ensureEmptyNestingStack())
@@ -621,8 +622,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
621622
auto SymName = expectIdent();
622623
if (SymName.empty())
623624
return true;
624-
auto WasmSym = cast<MCSymbolWasm>(
625-
TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
625+
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
626626
auto Signature = make_unique<wasm::WasmSignature>();
627627
if (parseRegTypeList(Signature->Params))
628628
return true;

‎llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@
1919
type = Library
2020
name = WebAssemblyDisassembler
2121
parent = WebAssembly
22-
required_libraries = MCDisassembler WebAssemblyInfo Support
22+
required_libraries = MCDisassembler WebAssemblyInfo WebAssemblyAsmPrinter Support
2323
add_to_library_groups = WebAssembly

‎llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp

+42-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
///
1616
//===----------------------------------------------------------------------===//
1717

18+
#include "InstPrinter/WebAssemblyInstPrinter.h"
1819
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
1920
#include "llvm/MC/MCContext.h"
2021
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -45,6 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler {
4546
ArrayRef<uint8_t> Bytes, uint64_t Address,
4647
raw_ostream &VStream,
4748
raw_ostream &CStream) const override;
49+
DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
50+
ArrayRef<uint8_t> Bytes, uint64_t Address,
51+
raw_ostream &VStream,
52+
raw_ostream &CStream) const override;
4853

4954
public:
5055
WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -77,7 +82,7 @@ static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
7782
}
7883

7984
static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
80-
bool Signed = false) {
85+
bool Signed) {
8186
unsigned N = 0;
8287
const char *Error = nullptr;
8388
Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
@@ -116,6 +121,41 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
116121
return true;
117122
}
118123

124+
MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
125+
StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
126+
raw_ostream &VStream, raw_ostream &CStream) const {
127+
Size = 0;
128+
if (Address == 0) {
129+
// Start of a code section: we're parsing only the function count.
130+
int64_t FunctionCount;
131+
if (!nextLEB(FunctionCount, Bytes, Size, false))
132+
return MCDisassembler::Fail;
133+
outs() << " # " << FunctionCount << " functions in section.";
134+
} else {
135+
// Parse the start of a single function.
136+
int64_t BodySize, LocalEntryCount;
137+
if (!nextLEB(BodySize, Bytes, Size, false) ||
138+
!nextLEB(LocalEntryCount, Bytes, Size, false))
139+
return MCDisassembler::Fail;
140+
if (LocalEntryCount) {
141+
outs() << " .local ";
142+
for (int64_t I = 0; I < LocalEntryCount; I++) {
143+
int64_t Count, Type;
144+
if (!nextLEB(Count, Bytes, Size, false) ||
145+
!nextLEB(Type, Bytes, Size, false))
146+
return MCDisassembler::Fail;
147+
for (int64_t J = 0; J < Count; J++) {
148+
if (I || J)
149+
outs() << ", ";
150+
outs() << WebAssembly::anyTypeToString(Type);
151+
}
152+
}
153+
}
154+
}
155+
outs() << "\n";
156+
return MCDisassembler::Success;
157+
}
158+
119159
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
120160
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
121161
raw_ostream & /*OS*/, raw_ostream &CS) const {
@@ -138,7 +178,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
138178
if (!WasmInst)
139179
return MCDisassembler::Fail;
140180
int64_t PrefixedOpc;
141-
if (!nextLEB(PrefixedOpc, Bytes, Size))
181+
if (!nextLEB(PrefixedOpc, Bytes, Size, false))
142182
return MCDisassembler::Fail;
143183
if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
144184
return MCDisassembler::Fail;

‎llvm/test/MC/WebAssembly/objdump.s

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o %t.o -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s
2+
# RUN: llvm-objdump -triple=wasm32-unknown-unknown -disassemble %t.o | FileCheck %s
3+
4+
.section .text.main1,"",@
5+
.type test0,@function
6+
test0:
7+
.functype test0 (i32, i64) -> (i32)
8+
.local f32, f64, v128, v128
9+
local.get 2
10+
end_function
11+
.Lfunc_end0:
12+
.size test0, .Lfunc_end0-test0
13+
14+
.section .text.main2,"",@
15+
.type test1,@function
16+
test1:
17+
.functype test1 (i32, i64) -> (i32)
18+
.local i32, i64, except_ref
19+
local.get 3
20+
end_function
21+
.Lfunc_end1:
22+
.size test1, .Lfunc_end1-test1
23+
24+
25+
# CHECK-LABEL: CODE:
26+
# CHECK: # 2 functions in section.
27+
# CHECK-LABEL: test0:
28+
# CHECK-NEXT: .local f32, f64, v128, v128
29+
# CHECK-NEXT: 9: 20 02 local.get 2
30+
# CHECK-NEXT: b: 0b end_block
31+
# CHECK-LABEL: test1:
32+
# CHECK-NEXT: .local i32, i64, except_ref
33+
# CHECK-NEXT: 14: 20 03 local.get 3
34+
# CHECK-NEXT: 16: 0b end_block

‎llvm/test/MC/WebAssembly/weak-alias.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -210,13 +210,13 @@ entry:
210210
; CHECK-NEXT: ...
211211

212212
; CHECK-SYMS: SYMBOL TABLE:
213-
; CHECK-SYMS-NEXT: 00000000 g F CODE .hidden foo
214-
; CHECK-SYMS-NEXT: 00000001 g F CODE .hidden call_direct
215-
; CHECK-SYMS-NEXT: 00000002 g F CODE .hidden call_alias
216-
; CHECK-SYMS-NEXT: 00000000 gw F CODE .hidden foo_alias
217-
; CHECK-SYMS-NEXT: 00000003 g F CODE .hidden call_direct_ptr
213+
; CHECK-SYMS-NEXT: 00000001 g F CODE .hidden foo
214+
; CHECK-SYMS-NEXT: 00000006 g F CODE .hidden call_direct
215+
; CHECK-SYMS-NEXT: 0000000f g F CODE .hidden call_alias
216+
; CHECK-SYMS-NEXT: 00000001 gw F CODE .hidden foo_alias
217+
; CHECK-SYMS-NEXT: 00000018 g F CODE .hidden call_direct_ptr
218218
; CHECK-SYMS-NEXT: 00000008 g O DATA direct_address
219-
; CHECK-SYMS-NEXT: 00000004 g F CODE .hidden call_alias_ptr
219+
; CHECK-SYMS-NEXT: 0000002b g F CODE .hidden call_alias_ptr
220220
; CHECK-SYMS-NEXT: 00000010 g O DATA alias_address
221221
; CHECK-SYMS-NEXT: 00000000 g O DATA bar
222222
; CHECK-SYMS-NEXT: 00000000 gw O DATA .hidden bar_alias
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
RUN: llvm-objdump -t %p/../Inputs/trivial.obj.wasm | FileCheck %s
22

33
CHECK: SYMBOL TABLE:
4-
CHECK-NEXT: 00000002 g F CODE main
4+
CHECK-NEXT: 00000001 g F CODE main
55
CHECK-NEXT: 00000000 l O DATA .L.str
66
CHECK-NEXT: 00000000 g F *UND* puts
7-
CHECK-NEXT: 00000003 l F CODE .LSomeOtherFunction_bitcast
7+
CHECK-NEXT: 00000019 l F CODE .LSomeOtherFunction_bitcast
88
CHECK-NEXT: 00000000 g F *UND* SomeOtherFunction
99
CHECK-NEXT: 00000010 g O DATA var

‎llvm/tools/llvm-objdump/llvm-objdump.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,12 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
16051605
raw_ostream &DebugOut = nulls();
16061606
#endif
16071607

1608+
// Some targets (like WebAssembly) have a special prelude at the start
1609+
// of each symbol.
1610+
DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
1611+
SectionAddr + Start, DebugOut, CommentStream);
1612+
Start += Size;
1613+
16081614
for (Index = Start; Index < End; Index += Size) {
16091615
MCInst Inst;
16101616

0 commit comments

Comments
 (0)
Please sign in to comment.