Index: lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp =================================================================== --- lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -136,6 +136,24 @@ // Much like WebAssemblyAsmPrinter in the backend, we have to own these. std::vector> Signatures; + // Order of labels, directives and instructions in a .s file have no + // syntactical enforcement. This class is a callback from the actual parser, + // and yet we have to be feeding data to the streamer in a very particular + // order to ensure a correct binary encoding that matches the regular backend + // (the streamer does not enforce this). This "state machine" enum helps + // guarantee that correct order. + enum ParserState { + FileStart, + Label, + FunctionStart, + FunctionLocals, + Instructions, + } CurrentState = FileStart; + + // We track this to see if a .functype following a label is the same, + // as this is how we recognize the start of a function. + MCSymbol *LastLabel = nullptr; + public: WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) @@ -334,6 +352,11 @@ return false; } + void onLabelParsed(MCSymbol *Symbol) override { + LastLabel = Symbol; + CurrentState = Label; + } + // This function processes wasm-specific directives streamed to // WebAssemblyTargetStreamer, all others go to the generic parser // (see WasmAsmParser). @@ -370,10 +393,19 @@ TOut.emitGlobalType(WasmSym); return Expect(AsmToken::EndOfStatement, "EOL"); } else if (DirectiveID.getString() == ".functype") { + // This code has to do send thing to the streamer similar to + // WebAssemblyAsmPrinter::EmitFunctionBodyStart. + // TODO: would be good to factor this into a common function, but the + // assembler and backend really don't share any common code, and this code + // parses the locals seperately. auto SymName = ExpectIdent(); if (SymName.empty()) return true; auto WasmSym = cast( TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); + if (CurrentState == Label && WasmSym == LastLabel) { + // This .functype indicates a start of a function. + CurrentState = FunctionStart; + } auto Signature = make_unique(); if (Expect(AsmToken::LParen, "(")) return true; if (ParseRegTypeList(Signature->Params)) return true; @@ -386,11 +418,16 @@ addSignature(std::move(Signature)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); TOut.emitFunctionType(WasmSym); + // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. return Expect(AsmToken::EndOfStatement, "EOL"); } else if (DirectiveID.getString() == ".local") { + if (CurrentState != FunctionStart) + return Error(".local directive should follow the start of a function", + Lexer.getTok()); SmallVector Locals; if (ParseRegTypeList(Locals)) return true; TOut.emitLocal(Locals); + CurrentState = FunctionLocals; return Expect(AsmToken::EndOfStatement, "EOL"); } return true; // We didn't process this directive. @@ -405,6 +442,16 @@ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { case Match_Success: { + if (CurrentState == FunctionStart) { + // This is the first instruction in a function, but we haven't seen + // a .local directive yet. The streamer requires locals to be encoded + // as a prelude to the instructions, so emit an empty list of locals + // here. + auto &TOut = reinterpret_cast( + *Out.getTargetStreamer()); + TOut.emitLocal(SmallVector()); + } + CurrentState = Instructions; Out.EmitInstruction(Inst, getSTI()); return false; } Index: test/MC/WebAssembly/assembler-binary.ll =================================================================== --- /dev/null +++ test/MC/WebAssembly/assembler-binary.ll @@ -0,0 +1,91 @@ +; RUN: llc -filetype=asm -asm-verbose=false %s -o - | FileCheck -check-prefix=ASM %s +; RUN: llc -filetype=asm -asm-verbose=false %s -o - | llvm-mc -triple=wasm32-unknown-unknown -filetype=asm -o - | FileCheck -check-prefix=ASM %s +; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -filetype=asm -asm-verbose=false %s -o - | llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o - | obj2yaml | FileCheck %s + +; This specifically tests that we can generate a binary from the assembler +; that produces the same binary as the backend would. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare void @bar() + +define void @foo(i32 %n) { +entry: + call void @bar() + ret void +} + +; Checking assembly is not the point of this test, but if something breaks +; it is easier to spot it here than in the yaml output. + +; ASM: .text +; ASM: .file "assembler-binary.ll" +; ASM: .globl foo +; ASM: foo: +; ASM-NEXT: .functype foo (i32) -> () +; ASM-NEXT: call bar@FUNCTION +; ASM-NEXT: end_function +; ASM-NEXT: .Lfunc_end0: +; ASM-NEXT: .size foo, .Lfunc_end0-foo +; ASM: .functype bar () -> () + + +; CHECK: --- !WASM +; CHECK-NEXT: FileHeader: +; CHECK-NEXT: Version: 0x00000001 +; CHECK-NEXT: Sections: +; CHECK-NEXT: - Type: TYPE +; CHECK-NEXT: Signatures: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: ReturnType: NORESULT +; CHECK-NEXT: ParamTypes: +; CHECK-NEXT: - I32 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: ReturnType: NORESULT +; CHECK-NEXT: ParamTypes: [] +; CHECK-NEXT: - Type: IMPORT +; CHECK-NEXT: Imports: +; CHECK-NEXT: - Module: env +; CHECK-NEXT: Field: __linear_memory +; CHECK-NEXT: Kind: MEMORY +; CHECK-NEXT: Memory: +; CHECK-NEXT: Initial: 0x00000000 +; CHECK-NEXT: - Module: env +; CHECK-NEXT: Field: __indirect_function_table +; CHECK-NEXT: Kind: TABLE +; CHECK-NEXT: Table: +; CHECK-NEXT: ElemType: ANYFUNC +; CHECK-NEXT: Limits: +; CHECK-NEXT: Initial: 0x00000000 +; CHECK-NEXT: - Module: env +; CHECK-NEXT: Field: bar +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: SigIndex: 1 +; CHECK-NEXT: - Type: FUNCTION +; CHECK-NEXT: FunctionTypes: [ 0 ] +; CHECK-NEXT: - Type: CODE +; CHECK-NEXT: Relocations: +; CHECK-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB +; CHECK-NEXT: Index: 1 +; CHECK-NEXT: Offset: 0x00000004 +; CHECK-NEXT: Functions: +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 1080808080000B +; CHECK-NEXT: - Type: CUSTOM +; CHECK-NEXT: Name: linking +; CHECK-NEXT: Version: 1 +; CHECK-NEXT: SymbolTable: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Name: foo +; CHECK-NEXT: Flags: [ ] +; CHECK-NEXT: Function: 1 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Name: bar +; CHECK-NEXT: Flags: [ UNDEFINED ] +; CHECK-NEXT: Function: 0 +; CHECK-NEXT: ...