Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -168,7 +168,8 @@ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + // XXX DO NOT COMMIT THIS. Wait for http://reviews.llvm.org/D15394 + //assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); @@ -216,11 +217,11 @@ // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. - if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { scavengeFrameVirtualRegs(Fn); - - // Clear any vregs created by virtual scavenging. - Fn.getRegInfo().clearVirtRegs(); + // Clear any vregs created by virtual scavenging. + Fn.getRegInfo().clearVirtRegs(); + } // Warn on stack size when we exceeds the given limit. MachineFrameInfo *MFI = Fn.getFrameInfo(); Index: lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -35,6 +35,10 @@ #define DEBUG_TYPE "wasm-frame-info" // TODO: Implement a red zone? +// TODO: wasm64 +// TODO: Prolog/epilog should be stackified too. This pass runs after register +// stackification, so we'll have to do it manually. +// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions /// Return true if the specified function should have a dedicated frame pointer /// register. @@ -42,9 +46,9 @@ const MachineFrameInfo *MFI = MF.getFrameInfo(); const auto *RegInfo = MF.getSubtarget().getRegisterInfo(); - return MFI->hasCalls() || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || MFI->hasStackMap() || - MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF); + return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || + MFI->hasStackMap() || MFI->hasPatchPoint() || + RegInfo->needsStackRealignment(MF); } /// Under normal circumstances, when a frame pointer is not required, we reserve @@ -63,12 +67,89 @@ llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr"); } -void WebAssemblyFrameLowering::emitPrologue(MachineFunction & /*MF*/, - MachineBasicBlock & /*MBB*/) const { - llvm_unreachable("TODO: implement emitPrologue"); +void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions + auto *MFI = MF.getFrameInfo(); + assert(MFI->getCalleeSavedInfo().empty() && + "WebAssembly should not have callee-saved registers"); + assert(!hasFP(MF) && "Functions needing frame pointers not yet supported"); + assert(!MFI->adjustsStack() && "Dynamic stack adjustmet not yet supported"); + uint64_t StackSize = MFI->getStackSize(); + if (!StackSize) + return; + + const auto *TII = MF.getSubtarget().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + auto InsertPt = MBB.begin(); + DebugLoc DL; + + // Get the current stacktop + // TODO: To support dynamic alloc, also copy to FP + unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg) + .addExternalSymbol(SPSymbol); + // This MachinePointerInfo should reference __stack_pointer as well but + // doesn't because MachinePointerInfo() takes a GV which we don't have for + // __stack_pointer. TODO: check if PseudoSourceValue::ExternalSymbolCallEntry + // is appropriate instead. (likewise for EmitEpologue below) + auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOLoad, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg) + .addImm(0) + .addReg(SPReg) + .addMemOperand(LoadMMO); + // Subtract the frame size + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), WebAssembly::SP32) + .addReg(SPReg) + .addReg(OffsetReg); + // The SP32 register now has the new stacktop. Also write it back to memory. + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addExternalSymbol(SPSymbol); + auto *MMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) + .addImm(0) + .addReg(OffsetReg) + .addReg(WebAssembly::SP32) + .addMemOperand(MMO); } -void WebAssemblyFrameLowering::emitEpilogue(MachineFunction & /*MF*/, - MachineBasicBlock & /*MBB*/) const { - llvm_unreachable("TODO: implement emitEpilogue"); +void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + uint64_t StackSize = MF.getFrameInfo()->getStackSize(); + if (!StackSize) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + auto InsertPt = MBB.getFirstTerminator(); + DebugLoc DL; + + if (InsertPt != MBB.end()) { + DL = InsertPt->getDebugLoc(); + } + + // Restore the stack pointer. Without FP its value is just SP32 - stacksize + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32) + .addReg(WebAssembly::SP32) + .addReg(OffsetReg); + // Re-use OffsetReg to hold the address of the stacktop + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addExternalSymbol(SPSymbol); + auto *MMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) + .addImm(0) + .addReg(OffsetReg) + .addReg(WebAssembly::SP32) + .addMemOperand(MMO); + } Index: lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -73,6 +73,7 @@ // Custom lowering hooks. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -167,6 +167,8 @@ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + // Expand these forms; we pattern-match the forms that we can handle in isel. for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) @@ -509,6 +511,8 @@ default: llvm_unreachable("unimplemented operation lowering"); return SDValue(); + case ISD::FrameIndex: + return LowerFrameIndex(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::ExternalSymbol: @@ -522,6 +526,12 @@ } } +SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, + SelectionDAG &DAG) const { + int FI = cast(Op)->getIndex(); + return DAG.getTargetFrameIndex(FI, Op.getValueType()); +} + SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); Index: lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { @@ -38,8 +39,13 @@ /// - defined and used in LIFO order with other stack registers BitVector VRegStackified; + // One entry for each possible target reg. we expect it to be small. + std::vector PhysRegs; + public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) { + PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U); + } ~WebAssemblyFunctionInfo() override; void addParam(MVT VT) { Params.push_back(VT); } @@ -64,9 +70,12 @@ assert(TargetRegisterInfo::virtReg2Index(VReg) < WARegs.size()); WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg; } - unsigned getWAReg(unsigned VReg) const { - assert(TargetRegisterInfo::virtReg2Index(VReg) < WARegs.size()); - return WARegs[TargetRegisterInfo::virtReg2Index(VReg)]; + unsigned getWAReg(unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); + return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; + } + return PhysRegs[Reg]; } // If new virtual registers are created after initWARegs has been called, // this function can be used to add WebAssembly register mappings for them. @@ -74,6 +83,12 @@ assert(VReg = WARegs.size()); WARegs.push_back(WAReg); } + + void addPReg(unsigned PReg, unsigned WAReg) { + assert(PReg < WebAssembly::NUM_TARGET_REGS); + assert(WAReg < -1U); + PhysRegs[PReg] = WAReg; + } }; } // end namespace llvm Index: lib/Target/WebAssembly/WebAssemblyPeephole.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -64,7 +64,9 @@ // can use $discard instead. MachineOperand &MO = MI.getOperand(0); unsigned OldReg = MO.getReg(); - if (OldReg == MI.getOperand(3).getReg()) { + // TODO: Handle SP/physregs + if (OldReg == MI.getOperand(3).getReg() + && TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) { unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); MO.setReg(NewReg); MO.setIsDead(); Index: lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -19,6 +19,7 @@ #include "WebAssemblySubtarget.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -60,6 +61,7 @@ WebAssemblyFunctionInfo &MFI = *MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo &FrameInfo = *MF.getFrameInfo(); MFI.initWARegs(); @@ -98,6 +100,9 @@ if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg) MFI.setWAReg(VReg, NumArgRegs + CurReg++); } + // Allocate locals for used physical registers + if (FrameInfo.getStackSize() > 0) + MFI.addPReg(WebAssembly::SP32, CurReg++); return true; } Index: lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -52,10 +52,37 @@ } void WebAssemblyRegisterInfo::eliminateFrameIndex( - MachineBasicBlock::iterator /*II*/, int /*SPAdj*/, - unsigned /*FIOperandNum*/, RegScavenger * /*RS*/) const { - llvm_unreachable( - "TODO: implement WebAssemblyRegisterInfo::eliminateFrameIndex"); + MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger * /*RS*/) const { + assert(SPAdj == 0); + MachineInstr &MI = *II; + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + const MachineFrameInfo& MFI = *MF.getFrameInfo(); + int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); + + if (MI.mayLoadOrStore()) { + // If this is a load or store, make it relative to SP and fold the frame + // offset directly in + assert(MI.getOperand(1).getImm() == 0 && + "Can't eliminate FI yet if offset is already set"); + MI.getOperand(1).setImm(FrameOffset); + MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + } else { + // Otherwise create an i32.add SP, offset and make it the operand + auto &MRI = MF.getRegInfo(); + const auto *TII = MF.getSubtarget().getInstrInfo(); + + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(FrameOffset); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), OffsetReg) + .addReg(WebAssembly::SP32) + .addReg(OffsetReg); + MI.getOperand(FIOperandNum).ChangeToRegister(OffsetReg, /*IsDef=*/false); + } } unsigned Index: lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -176,8 +176,6 @@ // virtual registers. Consider removing their restrictions and re-enabling // them. // - // Fails with: Regalloc must assign all vregs. - disablePass(&PrologEpilogCodeInserterID); // Fails with: should be run after register allocation. disablePass(&MachineCopyPropagationID); Index: test/CodeGen/WebAssembly/userstack.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/userstack.ll @@ -0,0 +1,70 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s +; RUN: llc < %s -asm-verbose=false -fast-isel | FileCheck %s + + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: alloca32: +define void @alloca32() { + ; CHECK: i32.const [[L1:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]]) + ; CHECK-NEXT: i32.const [[L2:.+]]=, 16 + ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]] + %retval = alloca i32 + ; CHECK: i32.const $push[[L3:.+]]=, 0 + ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]] + store i32 0, i32* %retval + ; CHECK: i32.const [[L4:.+]]=, 16 + ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]] + ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]] + ret void +} + +; CHECK-LABEL: alloca3264: +define void @alloca3264() { + ; CHECK: i32.const [[L1:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]]) + ; CHECK-NEXT: i32.const [[L2:.+]]=, 16 + ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]] + %r1 = alloca i32 + %r2 = alloca double + ; CHECK: i32.const $push[[L3:.+]]=, 0 + ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]] + store i32 0, i32* %r1 + ; CHECK: i64.const $push[[L4:.+]]=, 0 + ; CHECK: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]] + store double 0.0, double* %r2 + ; CHECK: i32.const [[L4:.+]]=, 16 + ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]] + ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]] + ret void +} + +define void @allocarray() { + ; CHECK: i32.const [[L1:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]]) + ; CHECK-NEXT: i32.const [[L2:.+]]=, 32 + ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]] + %r = alloca [5 x i32] + ; CHECK: i32.const $push[[L3:.+]]=, 1 + ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]] + %p = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 0 + store i32 1, i32* %p + ; CHECK: i32.const $push[[L4:.+]]=, 4 + ; CHECK: i32.const [[L5:.+]]=, 12 + ; CHECK: i32.add [[L5]]=, [[SP]], [[L5]] + ; CHECK: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]] + ; CHECK: i32.store {{.*}}=, 0($pop[[L6]]), ${{.+}} + %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 1 + store i32 1, i32* %p2 + ; CHECK: i32.const [[L7:.+]]=, 32 + ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]] + ; CHECK-NEXT: i32.const [[L8:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.store [[SP]]=, 0([[L7]]), [[SP]] + ret void +} + +; TODO: test aligned alloc