diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -348,6 +348,7 @@ IO.enumCase(ID, "default", TargetStackID::Default); IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill); IO.enumCase(ID, "scalable-vector", TargetStackID::ScalableVector); + IO.enumCase(ID, "wasm-local", TargetStackID::WasmLocal); IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc); } }; diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -24,12 +24,13 @@ class RegScavenger; namespace TargetStackID { - enum Value { - Default = 0, - SGPRSpill = 1, - ScalableVector = 2, - NoAlloc = 255 - }; +enum Value { + Default = 0, + SGPRSpill = 1, + ScalableVector = 2, + WasmLocal = 3, + NoAlloc = 255 +}; } /// Information about stack frame layout on the target. It holds the direction diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -661,6 +661,7 @@ case TargetStackID::SGPRSpill: return true; case TargetStackID::ScalableVector: + case TargetStackID::WasmLocal: return false; } llvm_unreachable("Invalid TargetStackID::Value"); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1099,6 +1099,7 @@ return true; case TargetStackID::NoAlloc: case TargetStackID::SGPRSpill: + case TargetStackID::WasmLocal: return false; } llvm_unreachable("Invalid TargetStackID::Value"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -239,8 +239,10 @@ Changed = true; } - // Start assigning local numbers after the last parameter. + // Start assigning local numbers after the last parameter and after any + // already-assigned locals. unsigned CurLocal = static_cast(MFI.getParams().size()); + CurLocal += static_cast(MFI.getLocals().size()); // Precompute the set of registers that are unused, so that we can insert // drops to their defs. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -43,6 +43,7 @@ bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool isSupportedStackID(TargetStackID::Value ID) const override; DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override; bool needsPrologForEH(const MachineFunction &MF) const; @@ -53,6 +54,11 @@ MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const; + // Returns the index of the WebAssembly local to which the stack object + // FrameIndex in MF should be allocated, or None. + static Optional getLocalForStackObject(MachineFunction &MF, + int FrameIndex); + static unsigned getSPReg(const MachineFunction &MF); static unsigned getFPReg(const MachineFunction &MF); static unsigned getOpcConst(const MachineFunction &MF); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -25,11 +25,13 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Instructions.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -39,6 +41,52 @@ // TODO: wasm64 // TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions +// In an ideal world, when objects are added to the MachineFrameInfo by +// FunctionLoweringInfo::set, we could somehow hook into target-specific code to +// ensure they are assigned the right stack ID. However there isn't a hook that +// runs between then and DAG building time, though, so instead we hoist stack +// objects lazily when they are first used, and comprehensively after the DAG is +// built via the PreprocessISelDAG hook, called by the +// SelectionDAGISel::runOnMachineFunction. We have to do it in two places +// because we want to do it while building the selection DAG for uses of alloca, +// but not all alloca instructions are used so we have to follow up afterwards. +Optional +WebAssemblyFrameLowering::getLocalForStackObject(MachineFunction &MF, + int FrameIndex) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // If already hoisted to a local, done. + if (MFI.getStackID(FrameIndex) == TargetStackID::WasmLocal) + return static_cast(MFI.getObjectOffset(FrameIndex)); + + // If not allocated in the object address space, this object will be in + // linear memory. + const AllocaInst *AI = MFI.getObjectAllocation(FrameIndex); + if (!AI || + !WebAssembly::isWasmVarAddressSpace(AI->getType()->getAddressSpace())) + return None; + + // Otherwise, allocate this object in the named value stack, outside of linear + // memory. + SmallVector ValueVTs; + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget().getTargetLowering(); + WebAssemblyFunctionInfo *FuncInfo = MF.getInfo(); + ComputeValueVTs(TLI, MF.getDataLayout(), AI->getAllocatedType(), ValueVTs); + MFI.setStackID(FrameIndex, TargetStackID::WasmLocal); + // Abuse SP offset to record the index of the first local in the object. + unsigned Local = FuncInfo->getParams().size() + FuncInfo->getLocals().size(); + MFI.setObjectOffset(FrameIndex, Local); + // Allocate WebAssembly locals for each non-aggregate component of the + // allocation. + for (EVT ValueVT : ValueVTs) + FuncInfo->addLocal(ValueVT.getSimpleVT()); + // Abuse object size to record number of WebAssembly locals allocated to + // this object. + MFI.setObjectSize(FrameIndex, ValueVTs.size()); + return static_cast(Local); +} + /// We need a base pointer in the case of having items on the stack that /// require stricter alignment than the stack pointer itself. Because we need /// to shift the stack pointer by some unknown amount to force the alignment, @@ -314,6 +362,16 @@ writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); } +bool WebAssemblyFrameLowering::isSupportedStackID( + TargetStackID::Value ID) const { + // Use the Object stack for WebAssembly locals which can only be accessed + // by name, not via an address in linear memory. + if (ID == TargetStackID::WasmLocal) + return true; + + return TargetFrameLowering::isSupportedStackID(ID); +} + TargetFrameLowering::DwarfFrameBase WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { DwarfFrameBase Loc; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -17,6 +17,8 @@ HANDLE_NODETYPE(RET_CALL) HANDLE_NODETYPE(RETURN) HANDLE_NODETYPE(ARGUMENT) +HANDLE_NODETYPE(LOCAL_GET) +HANDLE_NODETYPE(LOCAL_SET) // A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol HANDLE_NODETYPE(Wrapper) // A special wapper used in PIC code for __memory_base/__table_base relative diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" // To access function attributes. @@ -56,6 +57,8 @@ return SelectionDAGISel::runOnMachineFunction(MF); } + void PreprocessISelDAG() override; + void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, @@ -69,6 +72,18 @@ }; } // end anonymous namespace +void WebAssemblyDAGToDAGISel::PreprocessISelDAG() { + // Stack objects that should be allocated to locals are hoisted to WebAssembly + // locals when they are first used. However for those without uses, we hoist + // them here. It would be nice if there were some hook to do this when they + // are added to the MachineFrameInfo, but that's not the case right now. + MachineFrameInfo &FrameInfo = MF->getFrameInfo(); + for (int Idx = 0; Idx < FrameInfo.getObjectIndexEnd(); Idx++) + WebAssemblyFrameLowering::getLocalForStackObject(*MF, Idx); + + SelectionDAGISel::PreprocessISelDAG(); +} + void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1276,6 +1276,15 @@ return false; } +static Optional IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) { + const FrameIndexSDNode *FI = dyn_cast(Op); + if (!FI) + return None; + + auto &MF = DAG.getMachineFunction(); + return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex()); +} + SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1295,6 +1304,17 @@ SN->getMemoryVT(), SN->getMemOperand()); } + if (Optional Local = IsWebAssemblyLocal(Base, DAG)) { + if (!Offset->isUndef()) + report_fatal_error("unexpected offset when storing to webassembly local", + false); + + SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); + SDVTList Tys = DAG.getVTList(MVT::Other); // The chain. + SDValue Ops[] = {SN->getChain(), Idx, Value}; + return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops); + } + return Op; } @@ -1316,6 +1336,20 @@ LN->getMemoryVT(), LN->getMemOperand()); } + if (Optional Local = IsWebAssemblyLocal(Base, DAG)) { + if (!Offset->isUndef()) + report_fatal_error( + "unexpected offset when loading from webassembly local", false); + + SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); + EVT LocalVT = LN->getValueType(0); + SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT, + {LN->getChain(), Idx}); + SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL); + assert(Result->getNumValues() == 2 && "Loads must carry a chain!"); + return Result; + } + return Op; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -72,6 +72,8 @@ SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; +def SDT_WebAssemblyLocalGet : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; +def SDT_WebAssemblyLocalSet : SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>; def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -114,6 +116,12 @@ def WebAssemblyglobal_set : SDNode<"WebAssemblyISD::GLOBAL_SET", SDT_WebAssemblyGlobalSet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def WebAssemblylocal_get : + SDNode<"WebAssemblyISD::LOCAL_GET", SDT_WebAssemblyLocalGet, + [SDNPHasChain, SDNPMayLoad]>; +def WebAssemblylocal_set : + SDNode<"WebAssemblyISD::LOCAL_SET", SDT_WebAssemblyLocalSet, + [SDNPHasChain, SDNPMayStore]>; //===----------------------------------------------------------------------===// // WebAssembly-specific Operands. @@ -332,6 +340,10 @@ def : Pat<(WebAssemblyglobal_set vt:$src, (WebAssemblywrapper tglobaladdr:$addr)), (!cast("GLOBAL_SET_" # rc) tglobaladdr:$addr, vt:$src)>; + def : Pat<(vt (WebAssemblylocal_get (i32 timm:$local))), + (!cast("LOCAL_GET_" # rc) timm:$local)>; + def : Pat<(WebAssemblylocal_set timm:$local, vt:$src), + (!cast("LOCAL_SET_" # rc) timm:$local, vt:$src)>; } } defm "" : LOCAL; diff --git a/llvm/test/CodeGen/WebAssembly/ir-locals-stackid.ll b/llvm/test/CodeGen/WebAssembly/ir-locals-stackid.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ir-locals-stackid.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=wasm32-unknown-unknown -asm-verbose=false < %s | FileCheck %s --check-prefix=CHECKCG +; RUN: llc -mtriple=wasm32-unknown-unknown -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECKISEL + +%f32_cell = type float addrspace(1)* + +; CHECKISEL-LABEL: name: ir_local_f32 +; CHECKISEL: stack: +; CHECKISEL: id: 0, name: retval, type: default, offset: 1, size: 1, alignment: 4, +; CHECKISEL-NEXT: stack-id: wasm-local + +; CHECKCG-LABEL: ir_local_f32: +; CHECKCG-NEXT: .functype ir_local_f32 (f32) -> (f32) +; CHECKCG-NEXT: .local f32 +; CHECKCG-NEXT: local.get 0 +; CHECKCG-NEXT: local.set 1 + +define float @ir_local_f32(float %arg) { + %retval = alloca float, addrspace(1) + store float %arg, %f32_cell %retval + %reloaded = load float, %f32_cell %retval + ret float %reloaded +} diff --git a/llvm/test/CodeGen/WebAssembly/ir-locals.ll b/llvm/test/CodeGen/WebAssembly/ir-locals.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ir-locals.ll @@ -0,0 +1,87 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false | FileCheck %s + +%i32_cell = type i32 addrspace(1)* +%i64_cell = type i64 addrspace(1)* +%f32_cell = type float addrspace(1)* +%f64_cell = type double addrspace(1)* + +; We have a set of tests in which we set a local and then reload the +; local. If the load immediately follows the set, the DAG combiner will +; infer that the reloaded value is the same value that was set, which +; isn't what we want to test. To inhibit this optimization, we include +; an opaque call between the store and the load. +declare void @inhibit_store_to_load_forwarding() + +define i32 @ir_local_i32(i32 %arg) { + ; CHECK-LABEL: ir_local_i32: + ; CHECK-NEXT: .functype ir_local_i32 (i32) -> (i32) + %retval = alloca i32, addrspace(1) + ; CHECK-NEXT: .local i32 + store i32 %arg, %i32_cell %retval + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + call void @inhibit_store_to_load_forwarding() + ; CHECK-NEXT: call inhibit_store_to_load_forwarding + %reloaded = load i32, %i32_cell %retval + ; CHECK-NEXT: local.get 1 + ret i32 %reloaded + ; CHECK-NEXT: end_function +} + +define i64 @ir_local_i64(i64 %arg) { + ; CHECK-LABEL: ir_local_i64: + ; CHECK-NEXT: .functype ir_local_i64 (i64) -> (i64) + %retval = alloca i64, addrspace(1) + ; CHECK-NEXT: .local i64 + store i64 %arg, %i64_cell %retval + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + call void @inhibit_store_to_load_forwarding() + ; CHECK-NEXT: call inhibit_store_to_load_forwarding + %reloaded = load i64, %i64_cell %retval + ; See note in ir_local_i32. + ; CHECK-NEXT: local.get 1 + ret i64 %reloaded + ; CHECK-NEXT: end_function +} + +define float @ir_local_f32(float %arg) { + ; CHECK-LABEL: ir_local_f32: + ; CHECK-NEXT: .functype ir_local_f32 (f32) -> (f32) + %retval = alloca float, addrspace(1) + ; CHECK-NEXT: .local f32 + store float %arg, %f32_cell %retval + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + call void @inhibit_store_to_load_forwarding() + ; CHECK-NEXT: call inhibit_store_to_load_forwarding + %reloaded = load float, %f32_cell %retval + ; CHECK-NEXT: local.get 1 + ; CHECK-NEXT: end_function + ret float %reloaded +} + +define double @ir_local_f64(double %arg) { + ; CHECK-LABEL: ir_local_f64: + ; CHECK-NEXT: .functype ir_local_f64 (f64) -> (f64) + %retval = alloca double, addrspace(1) + ; CHECK-NEXT: .local f64 + store double %arg, %f64_cell %retval + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + call void @inhibit_store_to_load_forwarding() + ; CHECK-NEXT: call inhibit_store_to_load_forwarding + %reloaded = load double, %f64_cell %retval + ; CHECK-NEXT: local.get 1 + ; CHECK-NEXT: end_function + ret double %reloaded +} + +define void @ir_unreferenced_local() { + ; CHECK-LABEL: ir_unreferenced_local: + ; CHECK-NEXT: .functype ir_unreferenced_local () -> () + %unused = alloca i32, addrspace(1) + ; CHECK-NEXT: .local i32 + ret void + ; CHECK-NEXT: end_function +}