diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -348,6 +348,7 @@ IO.enumCase(ID, "default", TargetStackID::Default); IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill); IO.enumCase(ID, "scalable-vector", TargetStackID::ScalableVector); + IO.enumCase(ID, "object", TargetStackID::Object); IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc); } }; diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -24,12 +24,13 @@ class RegScavenger; namespace TargetStackID { - enum Value { - Default = 0, - SGPRSpill = 1, - ScalableVector = 2, - NoAlloc = 255 - }; +enum Value { + Default = 0, + SGPRSpill = 1, + ScalableVector = 2, + Object = 3, + NoAlloc = 255 +}; } /// Information about stack frame layout on the target. It holds the direction diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h @@ -15,10 +15,13 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H #define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H +#include "llvm/ADT/Optional.h" + namespace llvm { class MachineBasicBlock; class MachineInstr; +class MachineFunction; class MachineOperand; class MCContext; class MCSymbolWasm; @@ -28,6 +31,26 @@ namespace WebAssembly { +enum WasmAddressSpace : unsigned { + // Default address space, for pointers to unmanaged data in linear memory + // (stack, heap, data). + WASM_ADDRESS_SPACE_DEFAULT = 0, + // A non-integral address space for pointers to named objects outside of + // linear memory: static-storage-duration globals, or + // automatic-storage-duration locals. Loads and stores to these pointers are + // lowered to global.get / global.set or local.get / local.set, as + // appropriate. + WASM_ADDRESS_SPACE_OBJECT = 1 +}; + +inline bool isObjectAddressSpace(unsigned AS) { + return AS == WebAssembly::WASM_ADDRESS_SPACE_OBJECT; +} + +// Returns the index of the WebAssembly local to which the stack object +// FrameIndex in MF should be allocated, or None. +Optional getLocalForStackObject(MachineFunction &MF, int FrameIndex); + bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI); bool mayThrow(const MachineInstr &MI); diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp @@ -13,8 +13,12 @@ #include "WebAssemblyUtilities.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/IR/Instructions.h" #include "llvm/MC/MCContext.h" using namespace llvm; @@ -24,6 +28,48 @@ const char *const WebAssembly::PersonalityWrapperFn = "_Unwind_Wasm_CallPersonality"; +// In an ideal world, when objects are added to the MachineFrameInfo by +// FunctionLoweringInfo::set, we could somehow hook into target-specific code to +// ensure they are assigned the right stack ID. However there isn't a hook that +// runs between then and DAG building time, though, so instead we hoist stack +// objects lazily when they are first used, and comprehensively after the DAG is +// built via the PreprocessISelDAG hook, called by the +// SelectionDAGISel::runOnMachineFunction. +Optional WebAssembly::getLocalForStackObject(MachineFunction &MF, + int FrameIndex) { + auto &MFI = MF.getFrameInfo(); + + // If already hoisted to a local, done. + if (MFI.getStackID(FrameIndex) == TargetStackID::Object) + return static_cast(MFI.getObjectOffset(FrameIndex)); + + // If not allocated in the object address space, this object will be in + // linear memory. + const AllocaInst *AI = MFI.getObjectAllocation(FrameIndex); + if (!AI || !isObjectAddressSpace(AI->getType()->getAddressSpace())) + return None; + + // Otherwise, allocate this object in the named value stack, outside of linear + // memory. + SmallVector ValueVTs; + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget().getTargetLowering(); + WebAssemblyFunctionInfo *FuncInfo = MF.getInfo(); + ComputeValueVTs(TLI, MF.getDataLayout(), AI->getAllocatedType(), ValueVTs); + MFI.setStackID(FrameIndex, TargetStackID::Object); + // Abuse SP offset to record the index of the first local in the object. + unsigned Local = FuncInfo->getParams().size() + FuncInfo->getLocals().size(); + MFI.setObjectOffset(FrameIndex, Local); + // Allocate WebAssembly locals for each non-aggregate component of the + // allocation. + for (EVT ValueVT : ValueVTs) + FuncInfo->addLocal(ValueVT.getSimpleVT()); + // Abuse object size to record number of WebAssembly locals allocated to + // this object. + MFI.setObjectSize(FrameIndex, ValueVTs.size()); + return static_cast(Local); +} + /// Test whether MI is a child of some other node in an expression tree. bool WebAssembly::isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -239,8 +239,10 @@ Changed = true; } - // Start assigning local numbers after the last parameter. + // Start assigning local numbers after the last parameter and after any + // already-assigned locals. unsigned CurLocal = static_cast(MFI.getParams().size()); + CurLocal += static_cast(MFI.getLocals().size()); // Precompute the set of registers that are unused, so that we can insert // drops to their defs. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -1182,6 +1182,8 @@ const auto *Load = cast(I); if (Load->isAtomic()) return false; + if (Load->getPointerAddressSpace() != WebAssembly::WASM_ADDRESS_SPACE_DEFAULT) + return false; if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy()) return false; @@ -1240,6 +1242,9 @@ const auto *Store = cast(I); if (Store->isAtomic()) return false; + if (Store->getPointerAddressSpace() != + WebAssembly::WASM_ADDRESS_SPACE_DEFAULT) + return false; if (!Subtarget->hasSIMD128() && Store->getValueOperand()->getType()->isVectorTy()) return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -43,6 +43,7 @@ bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool isSupportedStackID(TargetStackID::Value ID) const override; DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override; bool needsPrologForEH(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -314,6 +314,16 @@ writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); } +bool WebAssemblyFrameLowering::isSupportedStackID( + TargetStackID::Value ID) const { + // Use the Object stack for WebAssembly locals which can only be accessed + // by name, not via an address in linear memory. + if (ID == TargetStackID::Object) + return true; + + return TargetFrameLowering::isSupportedStackID(ID); +} + TargetFrameLowering::DwarfFrameBase WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { DwarfFrameBase Loc; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -17,6 +17,8 @@ HANDLE_NODETYPE(RET_CALL) HANDLE_NODETYPE(RETURN) HANDLE_NODETYPE(ARGUMENT) +HANDLE_NODETYPE(LOCAL_GET) +HANDLE_NODETYPE(LOCAL_SET) // A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol HANDLE_NODETYPE(Wrapper) // A special wapper used in PIC code for __memory_base/__table_base relative diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "Utils/WebAssemblyUtilities.h" #include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" // To access function attributes. @@ -56,6 +58,8 @@ return SelectionDAGISel::runOnMachineFunction(MF); } + void PreprocessISelDAG() override; + void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, @@ -69,6 +73,18 @@ }; } // end anonymous namespace +void WebAssemblyDAGToDAGISel::PreprocessISelDAG() { + // Stack objects that should be allocated to locals are hoisted to WebAssembly + // locals when they are first used. However for those without uses, we hoist + // them here. It would be nice if there were some hook to do this when they + // are added to the MachineFrameInfo, but that's not the case right now. + MachineFrameInfo &FrameInfo = MF->getFrameInfo(); + for (int Idx = 0; Idx < FrameInfo.getObjectIndexEnd(); Idx++) + WebAssembly::getLocalForStackObject(*MF, Idx); + + SelectionDAGISel::PreprocessISelDAG(); +} + void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -45,14 +45,6 @@ WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI); - enum WasmAddressSpace { - // WebAssembly uses the following address spaces: - // AS 0 : is the default address space for values in linear memory - DEFAULT = 0, - // AS 1 : is a non-integral address space for global variables - GLOBAL = 1, - }; - private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -17,7 +17,6 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -1277,12 +1276,20 @@ static bool IsWebAssemblyGlobal(SDValue Op) { if (const GlobalAddressSDNode *GA = dyn_cast(Op)) - return GA->getAddressSpace() == - WebAssemblyTargetLowering::WasmAddressSpace::GLOBAL; + return WebAssembly::isObjectAddressSpace(GA->getAddressSpace()); return false; } +static Optional IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) { + const FrameIndexSDNode *FI = dyn_cast(Op); + if (!FI) + return None; + + auto &MF = DAG.getMachineFunction(); + return WebAssembly::getLocalForStackObject(MF, FI->getIndex()); +} + SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1302,6 +1309,17 @@ SN->getMemoryVT(), SN->getMemOperand()); } + if (Optional Local = IsWebAssemblyLocal(Base, DAG)) { + if (!Offset->isUndef()) + report_fatal_error("unexpected offset when storing to webassembly local", + false); + + SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); + SDVTList Tys = DAG.getVTList(MVT::Other); // The chain. + SDValue Ops[] = {SN->getChain(), Idx, Value}; + return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops); + } + return Op; } @@ -1324,6 +1342,20 @@ return DAG.getMergeValues({GlobalGet, LN->getChain()}, DL); } + if (Optional Local = IsWebAssemblyLocal(Base, DAG)) { + if (!Offset->isUndef()) + report_fatal_error( + "unexpected offset when loading from webassembly local", false); + + SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); + EVT LocalVT = LN->getValueType(0); + SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT, + {LN->getChain(), Idx}); + SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL); + assert(Result->getNumValues() == 2 && "Loads must carry a chain!"); + return Result; + } + return Op; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -72,6 +72,8 @@ SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; +def SDT_WebAssemblyLocalGet : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; +def SDT_WebAssemblyLocalSet : SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>; def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -114,6 +116,12 @@ def WebAssemblyglobal_set : SDNode<"WebAssemblyISD::GLOBAL_SET", SDT_WebAssemblyGlobalSet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def WebAssemblylocal_get : + SDNode<"WebAssemblyISD::LOCAL_GET", SDT_WebAssemblyLocalGet, + [SDNPHasChain, SDNPMayLoad]>; +def WebAssemblylocal_set : + SDNode<"WebAssemblyISD::LOCAL_SET", SDT_WebAssemblyLocalSet, + [SDNPHasChain, SDNPMayStore]>; //===----------------------------------------------------------------------===// // WebAssembly-specific Operands. @@ -332,6 +340,10 @@ def : Pat<(WebAssemblyglobal_set vt:$src, (WebAssemblywrapper tglobaladdr:$addr)), (!cast("GLOBAL_SET_" # reg) tglobaladdr:$addr, vt:$src)>; + def : Pat<(vt (WebAssemblylocal_get (i32 timm:$local))), + (!cast("LOCAL_GET_" # reg) timm:$local)>; + def : Pat<(WebAssemblylocal_set timm:$local, vt:$src), + (!cast("LOCAL_SET_" # reg) timm:$local, reg:$src)>; } } defm "" : LOCAL; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -120,8 +120,8 @@ Optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, TT.isArch64Bit() - ? "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1" - : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1", + ? "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1-A0:u" + : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1-A0:u", TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT), getEffectiveCodeModel(CM, CodeModel::Large), OL), TLOF(new WebAssemblyTargetObjectFile()) { diff --git a/llvm/test/CodeGen/WebAssembly/ir-locals.ll b/llvm/test/CodeGen/WebAssembly/ir-locals.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ir-locals.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false | FileCheck %s + +%i32_cell = type i32 addrspace(1)* +%i64_cell = type i64 addrspace(1)* +%f32_cell = type float addrspace(1)* +%f64_cell = type double addrspace(1)* + +define i32 @ir_local_i32(i32 %arg) { + ; CHECK-LABEL: ir_local_i32: + ; CHECK-NEXT: .functype ir_local_i32 (i32) -> (i32) + %retval = alloca i32, addrspace(1) + ; CHECK-NEXT: .local i32 + store i32 %arg, %i32_cell %retval + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + %reloaded = load i32, %i32_cell %retval + ; The DAG combiner infers that %reloaded is the same as %arg and + ; ultimately causes "local.get 0" to be emitted instead of + ; "local.get 1". + ; CHECK-NEXT: local.get 0 + ret i32 %reloaded + ; CHECK-NEXT: end_function +} + +define i64 @ir_local_i64(i64 %arg) { + ; CHECK-LABEL: ir_local_i64: + ; CHECK-NEXT: .functype ir_local_i64 (i64) -> (i64) + %retval = alloca i64, addrspace(1) + ; CHECK-NEXT: .local i64 + store i64 %arg, %i64_cell %retval + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + %reloaded = load i64, %i64_cell %retval + ; See note in ir_local_i32. + ; CHECK-NEXT: local.get 0 + ret i64 %reloaded + ; CHECK-NEXT: end_function +} + +define float @ir_local_f32(float %arg) { + ; CHECK-LABEL: ir_local_f32: + ; CHECK-NEXT: .functype ir_local_f32 (f32) -> (f32) + %retval = alloca float, addrspace(1) + ; CHECK-NEXT: .local f32 + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + store float %arg, %f32_cell %retval + ; See note in ir_local_i32. + ; CHECK-NEXT: local.get 0 + %reloaded = load float, %f32_cell %retval + ; CHECK-NEXT: end_function + ret float %reloaded +} + +define double @ir_local_f64(double %arg) { + ; CHECK-LABEL: ir_local_f64: + ; CHECK-NEXT: .functype ir_local_f64 (f64) -> (f64) + %retval = alloca double, addrspace(1) + ; CHECK-NEXT: .local f64 + ; CHECK-NEXT: local.get 0 + ; CHECK-NEXT: local.set 1 + store double %arg, %f64_cell %retval + ; CHECK-NEXT: local.get 0 + %reloaded = load double, %f64_cell %retval + ; CHECK-NEXT: end_function + ret double %reloaded +} + +define void @ir_unreferenced_local() { + ; CHECK-LABEL: ir_unreferenced_local: + ; CHECK-NEXT: .functype ir_unreferenced_local () -> () + %unused = alloca i32, addrspace(1) + ; CHECK-NEXT: .local i32 + ret void + ; CHECK-NEXT: end_function +}