diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -361,11 +361,13 @@ raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context); - /// True if the target uses physical regs at Prolog/Epilog insertion - /// time. If true (most machines), all vregs must be allocated before - /// PEI. If false (virtual-register machines), then callee-save register - /// spilling and scavenging are not needed or used. - virtual bool usesPhysRegsForPEI() const { return true; } + /// True if the target uses physical regs (as nearly all targets do). False + /// for stack machines such as WebAssembly and other virtual-register + /// machines. If true, all vregs must be allocated before PEI. If false, then + /// callee-save register spilling and scavenging are not needed or used. If + /// false, implicitly defined registers will still be assumed to be physical + /// registers, except that variadic defs will be allocated vregs. + virtual bool usesPhysRegsForValues() const { return true; } /// True if the target wants to use interprocedural register allocation by /// default. The -enable-ipra flag can be used to override this. diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -237,7 +237,7 @@ stashEntryDbgValues(*SaveBlock, EntryDbgValues); // Handle CSR spilling and restoring, for targets that need it. - if (MF.getTarget().usesPhysRegsForPEI()) + if (MF.getTarget().usesPhysRegsForValues()) spillCalleeSavedRegs(MF); // Allow the target machine to make final modifications to the function diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -195,7 +195,10 @@ "IMPLICIT_DEF should have been handled as a special case elsewhere!"); unsigned NumResults = CountResults(Node); - for (unsigned i = 0; i < II.getNumDefs(); ++i) { + bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && + II.isVariadic() && II.variadicOpsAreDefs(); + unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs(); + for (unsigned i = 0; i < NumVRegs; ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. @@ -828,7 +831,10 @@ unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); - bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; + bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && + II.isVariadic() && II.variadicOpsAreDefs(); + bool HasPhysRegOuts = NumResults > NumDefs && + II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -509,8 +509,8 @@ /// Returns the operand number of a callee, assuming the argument is a call /// instruction. -inline unsigned getCalleeOpNo(unsigned Opc) { - switch (Opc) { +inline const MachineOperand &getCalleeOp(const MachineInstr &MI) { + switch (MI.getOpcode()) { case WebAssembly::CALL_VOID: case WebAssembly::CALL_VOID_S: case WebAssembly::CALL_INDIRECT_VOID: @@ -519,7 +519,7 @@ case WebAssembly::RET_CALL_S: case WebAssembly::RET_CALL_INDIRECT: case WebAssembly::RET_CALL_INDIRECT_S: - return 0; + return MI.getOperand(0); case WebAssembly::CALL_i32: case WebAssembly::CALL_i32_S: case WebAssembly::CALL_i64: @@ -564,7 +564,10 @@ case WebAssembly::CALL_INDIRECT_v2f64_S: case WebAssembly::CALL_INDIRECT_exnref: case WebAssembly::CALL_INDIRECT_exnref_S: - return 1; + return MI.getOperand(1); + case WebAssembly::CALL: + case WebAssembly::CALL_S: + return MI.getOperand(MI.getNumDefs()); default: llvm_unreachable("Not a call instruction"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -15,6 +15,7 @@ HANDLE_NODETYPE(CALL1) HANDLE_NODETYPE(CALL0) +HANDLE_NODETYPE(CALL) HANDLE_NODETYPE(RET_CALL) HANDLE_NODETYPE(RETURN) HANDLE_NODETYPE(ARGUMENT) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -206,6 +206,28 @@ } break; } + case WebAssemblyISD::CALL: { + // CALL has both variable operands and variable results, but ISel only + // supports one or the other. Split calls into two nodes connected by a + // dummy value, one for the operands and one for the results. These two + // nodes will be recombined in a custom inserter hook into a single + // MachineInstr. + SmallVector Ops; + for (size_t i = 1; i < Node->getNumOperands(); ++i) { + SDValue Op = Node->getOperand(i); + if (Op->getOpcode() == WebAssemblyISD::Wrapper) + Op = Op->getOperand(0); + Ops.push_back(Op); + } + Ops.push_back(Node->getOperand(0)); + MachineSDNode *CallParams = + CurDAG->getMachineNode(WebAssembly::CALL_PARAMS, DL, MVT::i32, Ops); + SDValue Link(CallParams, 0); + MachineSDNode *CallResults = CurDAG->getMachineNode( + WebAssembly::CALL_RESULTS, DL, Node->getVTList(), Link); + ReplaceNode(Node, CallResults); + return; + } default: break; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -427,6 +427,29 @@ return DoneMBB; } +static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults, + DebugLoc DL, MachineBasicBlock *BB, + const TargetInstrInfo &TII) { + MachineInstr &CallParams = *CallResults.getPrevNode(); + assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS); + assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS); + + MachineFunction &MF = *BB->getParent(); + const MCInstrDesc &MCID = TII.get(WebAssembly::CALL); + MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); + + for (auto Def : CallResults.defs()) + MIB.add(Def); + for (auto Use : CallParams.uses()) + MIB.add(Use); + + BB->insert(CallResults.getIterator(), MIB); + CallParams.eraseFromParent(); + CallResults.eraseFromParent(); + + return BB; +} + MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); @@ -459,7 +482,8 @@ case WebAssembly::FP_TO_UINT_I64_F64: return LowerFPToInt(MI, DL, BB, TII, true, true, true, WebAssembly::I64_TRUNC_U_F64); - llvm_unreachable("Unexpected instruction to emit with custom inserter"); + case WebAssembly::CALL_RESULTS: + return LowerCallResults(MI, DL, BB, TII); } } @@ -695,9 +719,6 @@ } SmallVectorImpl &Ins = CLI.Ins; - if (Ins.size() > 1) - fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); - SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; @@ -843,18 +864,27 @@ } InTys.push_back(MVT::Other); - SDVTList InTyList = DAG.getVTList(InTys); - SDValue Res = - DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, - DL, InTyList, Ops); - if (Ins.empty()) { - Chain = Res; - } else { - InVals.push_back(Res); - Chain = Res.getValue(1); + unsigned Opc; + // TODO: Remove CALL0 and CALL1 in favor of CALL + switch (Ins.size()) { + case 0: + Opc = WebAssemblyISD::CALL0; + break; + case 1: + Opc = WebAssemblyISD::CALL1; + break; + default: + Opc = WebAssemblyISD::CALL; + break; } + SDVTList InTyList = DAG.getVTList(InTys); + SDValue Res = DAG.getNode(Opc, DL, InTyList, Ops); - return Chain; + for (size_t I = 0; I < Ins.size(); ++I) + InVals.push_back(Res.getValue(I)); + + // Return the chain + return Res.getValue(Ins.size()); } bool WebAssemblyTargetLowering::CanLowerReturn( diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -54,7 +54,31 @@ Requires; } +// CALL should take both variadic arguments and produce variadic results, but +// this is not possible to model directly. Instead, we select calls to a +// CALL_PARAMS taking variadic aguments linked with a CALL_RESULTS that handles +// producing the call's variadic results. We recombine the two in a custom +// inserter hook after DAG ISel, so passes over MachineInstrs will only ever +// observe CALL nodes with all of the expected variadic uses and defs. +let isPseudo = 1 in +defm CALL_PARAMS : + I<(outs I32:$link), (ins function32_op:$callee, variable_ops), + (outs), (ins function32_op:$callee), [], + "call_params\t$callee", "call_params\t$callee", -1>; + +let variadicOpsAreDefs = 1, usesCustomInserter = 1, isPseudo = 1 in +defm CALL_RESULTS : + I<(outs), (ins I32:$link, variable_ops), (outs), (ins), [], + "call_results", "call_results", -1>; + let Uses = [SP32, SP64], isCall = 1 in { + +// TODO: Add an indirect version of the variadic call, delete CALL_* +defm CALL : + I<(outs), (ins function32_op:$callee, variable_ops), + (outs), (ins function32_op:$callee), [], + "call\t$callee", "call\t$callee", 0x10>; + defm "" : CALL; defm "" : CALL; defm "" : CALL; @@ -68,6 +92,7 @@ defm "" : CALL; let IsCanonical = 1 in { + defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops), (outs), (ins function32_op:$callee), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -135,12 +135,12 @@ // Determine whether a call to the callee referenced by // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side // effects. -static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read, - bool &Write, bool &Effects, bool &StackPointer) { +static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write, + bool &Effects, bool &StackPointer) { // All calls can use the stack pointer. StackPointer = true; - const MachineOperand &MO = MI.getOperand(CalleeOpNo); + const MachineOperand &MO = WebAssembly::getCalleeOp(MI); if (MO.isGlobal()) { const Constant *GV = MO.getGlobal(); if (const auto *GA = dyn_cast(GV)) @@ -252,8 +252,7 @@ // Analyze calls. if (MI.isCall()) { - unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode()); - queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer); + queryCallee(MI, Read, Write, Effects, StackPointer); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -47,7 +47,7 @@ TargetTransformInfo getTargetTransformInfo(const Function &F) override; - bool usesPhysRegsForPEI() const override { return false; } + bool usesPhysRegsForValues() const override { return false; } yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -49,7 +49,7 @@ if (!MI.isCall()) return false; - const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode())); + const MachineOperand &MO = getCalleeOp(MI); assert(MO.isGlobal() || MO.isSymbol()); if (MO.isSymbol()) { diff --git a/llvm/test/CodeGen/WebAssembly/multivalue.ll b/llvm/test/CodeGen/WebAssembly/multivalue.ll --- a/llvm/test/CodeGen/WebAssembly/multivalue.ll +++ b/llvm/test/CodeGen/WebAssembly/multivalue.ll @@ -1,29 +1,116 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+multivalue | FileCheck %s -; RUN: llc < %s --filetype=obj -mattr=+multivalue | obj2yaml | FileCheck %s --check-prefix OBJ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+multivalue,+tail-call | FileCheck %s +; RUN: llc < %s --filetype=obj -mattr=+multivalue,+tail-call | obj2yaml | FileCheck %s --check-prefix OBJ -; Test that the multivalue returns, function types, and block types -; work as expected. +; Test that the multivalue calls, returns, function types, and block +; types work as expected. target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -%pair = type { i32, i32 } -%packed_pair = type <{ i32, i32 }> +%pair = type { i32, i64 } +%packed_pair = type <{ i32, i64 }> + + +; CHECK-LABEL: pair_const: +; CHECK-NEXT: .functype pair_const () -> (i32, i64) +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 42{{$}} +; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 42{{$}} +; CHECK-NEXT: return $pop[[L0]], $pop[[L1]]{{$}} +define %pair @pair_const() { + ret %pair { i32 42, i64 42 } +} + +; CHECK-LABEL: packed_pair_const: +; CHECK-NEXT: .functype packed_pair_const () -> (i32, i64) +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 42{{$}} +; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 42{{$}} +; CHECK-NEXT: return $pop[[L0]], $pop[[L1]]{{$}} +define %packed_pair @packed_pair_const() { + ret %packed_pair <{ i32 42, i64 42 }> +} ; CHECK-LABEL: pair_ident: -; CHECK-NEXT: .functype pair_ident (i32, i32) -> (i32, i32) +; CHECK-NEXT: .functype pair_ident (i32, i64) -> (i32, i64) ; CHECK-NEXT: return $0, $1{{$}} define %pair @pair_ident(%pair %p) { ret %pair %p } ; CHECK-LABEL: packed_pair_ident: -; CHECK-NEXT: .functype packed_pair_ident (i32, i32) -> (i32, i32) +; CHECK-NEXT: .functype packed_pair_ident (i32, i64) -> (i32, i64) ; CHECK-NEXT: return $0, $1{{$}} define %packed_pair @packed_pair_ident(%packed_pair %p) { ret %packed_pair %p } +;; TODO: Multivalue calls are a WIP and do not necessarily produce +;; correct output. For now, just check that they don't cause any +;; crashes. + +define void @pair_call() { + %p = call %pair @pair_const() + ret void +} + +define void @packed_pair_call() { + %p = call %packed_pair @packed_pair_const() + ret void +} + +define %pair @pair_call_return() { + %p = call %pair @pair_const() + ret %pair %p +} + +define %packed_pair @packed_pair_call_return() { + %p = call %packed_pair @packed_pair_const() + ret %packed_pair %p +} + +define %pair @pair_tail_call() { + %p = musttail call %pair @pair_const() + ret %pair %p +} + +define %packed_pair @packed_pair_tail_call() { + %p = musttail call %packed_pair @packed_pair_const() + ret %packed_pair %p +} + +define i32 @pair_call_return_first() { + %p = call %pair @pair_const() + %v = extractvalue %pair %p, 0 + ret i32 %v +} + +define i32 @packed_pair_call_return_first() { + %p = call %packed_pair @packed_pair_const() + %v = extractvalue %packed_pair %p, 0 + ret i32 %v +} + +define i64 @pair_call_return_second() { + %p = call %pair @pair_const() + %v = extractvalue %pair %p, 1 + ret i64 %v +} + +define i64 @packed_pair_call_return_second() { + %p = call %packed_pair @packed_pair_const() + %v = extractvalue %packed_pair %p, 1 + ret i64 %v +} + +define %pair @pair_pass_through(%pair %p) { + %r = call %pair @pair_ident(%pair %p) + ret %pair %r +} + +define %packed_pair @packed_pair_pass_through(%packed_pair %p) { + %r = call %packed_pair @packed_pair_ident(%packed_pair %p) + ret %packed_pair %r +} + ; CHECK-LABEL: minimal_loop: ; CHECK-NEXT: .functype minimal_loop (i32) -> (i32, i64) ; CHECK-NEXT: .LBB{{[0-9]+}}_1: @@ -31,7 +118,7 @@ ; CHECK-NEXT: br 0{{$}} ; CHECK-NEXT: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: end_loop{{$}} -define {i32, i64} @minimal_loop(i32* %p) { +define %pair @minimal_loop(i32* %p) { entry: br label %loop loop: @@ -39,21 +126,40 @@ } ; CHECK-LABEL: .section .custom_section.target_features -; CHECK-NEXT: .int8 1 +; CHECK-NEXT: .int8 2 ; CHECK-NEXT: .int8 43 ; CHECK-NEXT: .int8 10 ; CHECK-NEXT: .ascii "multivalue" +; CHECK-NEXT: .int8 43 +; CHECK-NEXT: .int8 9 +; CHECK-NEXT: .ascii "tail-call" ; OBJ-LABEL: - Type: TYPE ; OBJ-NEXT: Signatures: ; OBJ-NEXT: - Index: 0 -; OBJ-NEXT: ParamTypes: +; OBJ-NEXT: ParamTypes: [] +; OBJ-NEXT: ReturnTypes: ; OBJ-NEXT: - I32 +; OBJ-NEXT: - I64 +; OBJ-NEXT: - Index: 1 +; OBJ-NEXT: ParamTypes: ; OBJ-NEXT: - I32 +; OBJ-NEXT: - I64 ; OBJ-NEXT: ReturnTypes: ; OBJ-NEXT: - I32 +; OBJ-NEXT: - I64 +; OBJ-NEXT: - Index: 2 +; OBJ-NEXT: ParamTypes: [] +; OBJ-NEXT: ReturnTypes: [] +; OBJ-NEXT: - Index: 3 +; OBJ-NEXT: ParamTypes: [] +; OBJ-NEXT: ReturnTypes: ; OBJ-NEXT: - I32 -; OBJ-NEXT: - Index: 1 +; OBJ-NEXT: - Index: 4 +; OBJ-NEXT: ParamTypes: [] +; OBJ-NEXT: ReturnTypes: +; OBJ-NEXT: - I64 +; OBJ-NEXT: - Index: 5 ; OBJ-NEXT: ParamTypes: ; OBJ-NEXT: - I32 ; OBJ-NEXT: ReturnTypes: