Index: lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- lib/Target/WebAssembly/CMakeLists.txt +++ lib/Target/WebAssembly/CMakeLists.txt @@ -29,6 +29,7 @@ WebAssemblyRegColoring.cpp WebAssemblyRegNumbering.cpp WebAssemblyRegStackify.cpp + WebAssemblyReplacePhysRegs.cpp WebAssemblySelectionDAGInfo.cpp WebAssemblySetP2AlignOperands.cpp WebAssemblyStoreResults.cpp Index: lib/Target/WebAssembly/WebAssembly.h =================================================================== --- lib/Target/WebAssembly/WebAssembly.h +++ lib/Target/WebAssembly/WebAssembly.h @@ -30,10 +30,11 @@ FunctionPass *createWebAssemblyArgumentMove(); FunctionPass *createWebAssemblySetP2AlignOperands(); +FunctionPass *createWebAssemblyPEI(); +FunctionPass *createWebAssemblyReplacePhysRegs(); FunctionPass *createWebAssemblyStoreResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); -FunctionPass *createWebAssemblyPEI(); FunctionPass *createWebAssemblyFixIrreducibleControlFlow(); FunctionPass *createWebAssemblyCFGStackify(); FunctionPass *createWebAssemblyLowerBrUnless(); Index: lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -93,10 +93,7 @@ //===----------------------------------------------------------------------===// MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { - const TargetRegisterClass *TRC = - TargetRegisterInfo::isVirtualRegister(RegNo) - ? MRI->getRegClass(RegNo) - : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo); + const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) if (TRC->hasType(T)) return T; @@ -183,13 +180,6 @@ LocalTypes.push_back(getRegType(VReg)); AnyWARegs = true; } - auto &PhysRegs = MFI->getPhysRegs(); - for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) { - if (PhysRegs[PReg] == -1U) - continue; - LocalTypes.push_back(getRegType(PReg)); - AnyWARegs = true; - } if (AnyWARegs) getTargetStreamer()->emitLocal(LocalTypes); Index: lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -82,8 +82,11 @@ MachineBasicBlock::iterator &InsertStore, DebugLoc DL) { auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); - unsigned SPAddr = - MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned SPAddr = MRI.createVirtualRegister(PtrRC); + unsigned Discard = MRI.createVirtualRegister(PtrRC); const auto *TII = MF.getSubtarget().getInstrInfo(); BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr) @@ -91,13 +94,12 @@ auto *MMO = new MachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 4, 4); BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32), - SrcReg) + Discard) .addImm(0) .addReg(SPAddr) .addImm(2) // p2align .addReg(SrcReg) .addMemOperand(MMO); - MF.getInfo()->stackifyVReg(SPAddr); } MachineBasicBlock::iterator @@ -121,7 +123,6 @@ auto *MFI = MF.getFrameInfo(); assert(MFI->getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers"); - auto *WFI = MF.getInfo(); if (!needsSP(MF, *MFI)) return; uint64_t StackSize = MFI->getStackSize(); @@ -132,8 +133,10 @@ auto InsertPt = MBB.begin(); DebugLoc DL; - unsigned SPAddr = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned SPAddr = MRI.createVirtualRegister(PtrRC); + unsigned SPReg = MRI.createVirtualRegister(PtrRC); auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr) .addExternalSymbol(SPSymbol); @@ -150,25 +153,22 @@ .addReg(SPAddr) // addr .addImm(2) // p2align .addMemOperand(LoadMMO); - WFI->stackifyVReg(SPAddr); if (StackSize) { // Subtract the frame size - unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), WebAssembly::SP32) .addReg(SPReg) .addReg(OffsetReg); - WFI->stackifyVReg(OffsetReg); - WFI->stackifyVReg(SPReg); } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive // offsets in load/store instructions. - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY_LOCAL_I32), + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) .addReg(WebAssembly::SP32); } @@ -182,7 +182,6 @@ auto *MFI = MF.getFrameInfo(); uint64_t StackSize = MFI->getStackSize(); if (!needsSP(MF, *MFI) || !needsSPWriteback(MF, *MFI)) return; - auto *WFI = MF.getInfo(); const auto *TII = MF.getSubtarget().getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.getFirstTerminator(); @@ -197,18 +196,18 @@ unsigned SPReg = 0; MachineBasicBlock::iterator InsertAddr = InsertPt; if (StackSize) { - unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); InsertAddr = BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); // In the epilog we don't need to write the result back to the SP32 physreg // because it won't be used again. We can use a stackified register instead. - SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + SPReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) .addReg(OffsetReg); - WFI->stackifyVReg(OffsetReg); - WFI->stackifyVReg(SPReg); } else { SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; } Index: lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -39,18 +39,13 @@ /// - defined and used in LIFO order with other stack registers BitVector VRegStackified; - // One entry for each possible target reg. we expect it to be small. - std::vector PhysRegs; - // A virtual register holding the pointer to the vararg buffer for vararg // functions. It is created and set in TLI::LowerFormalArguments and read by // TLI::LowerVASTART unsigned VarargVreg = -1U; public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) { - PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U); - } + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} ~WebAssemblyFunctionInfo() override; void addParam(MVT VT) { Params.push_back(VT); } @@ -87,11 +82,9 @@ WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg; } unsigned getWAReg(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); - return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; - } - return PhysRegs[Reg]; + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); + return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; } // If new virtual registers are created after initWARegs has been called, // this function can be used to add WebAssembly register mappings for them. @@ -99,13 +92,6 @@ assert(VReg = WARegs.size()); WARegs.push_back(WAReg); } - - void addPReg(unsigned PReg, unsigned WAReg) { - assert(PReg < WebAssembly::NUM_TARGET_REGS); - assert(WAReg < -1U); - PhysRegs[PReg] = WAReg; - } - const std::vector &getPhysRegs() const { return PhysRegs; } }; } // end namespace llvm Index: lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -61,7 +61,6 @@ WebAssemblyFunctionInfo &MFI = *MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineFrameInfo &FrameInfo = *MF.getFrameInfo(); MFI.initWARegs(); @@ -73,11 +72,16 @@ case WebAssembly::ARGUMENT_I32: case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: - case WebAssembly::ARGUMENT_F64: + case WebAssembly::ARGUMENT_F64: { + int64_t Imm = MI.getOperand(1).getImm(); + // Phys-reg livein vregs don't need special numbering. + if (Imm < 0) + continue; DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg " - << MI.getOperand(1).getImm() << "\n"); - MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); + << Imm << "\n"); + MFI.setWAReg(MI.getOperand(0).getReg(), Imm); break; + } default: break; } @@ -107,16 +111,6 @@ MFI.setWAReg(VReg, CurReg++); } } - // Allocate locals for used physical registers - bool HasFP = MF.getSubtarget().getFrameLowering()->hasFP(MF); - if (FrameInfo.getStackSize() > 0 || FrameInfo.adjustsStack() || HasFP) { - DEBUG(dbgs() << "PReg SP " << CurReg << "\n"); - MFI.addPReg(WebAssembly::SP32, CurReg++); - } - if (HasFP) { - DEBUG(dbgs() << "PReg FP " << CurReg << "\n"); - MFI.addPReg(WebAssembly::FP32, CurReg++); - } return true; } Index: lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -51,51 +51,6 @@ return Reserved; } -static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI, - const MachineOperand& Op) { - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - return TargetRegisterInfo::isVirtualRegister(Reg) && - WFI->isVRegStackified(Reg); - } - return false; -} - -static bool canStackifyOperand(const MachineInstr& Inst) { - unsigned Op = Inst.getOpcode(); - return Op != TargetOpcode::PHI && - Op != TargetOpcode::INLINEASM && - Op != TargetOpcode::DBG_VALUE; -} - -// Determine if the FI sequence can be stackified, and if so, where the code can -// be inserted. If stackification is possible, returns true and ajusts II to -// point to the insertion point. -bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB, - unsigned OperandNum, MachineBasicBlock::iterator &II) { - if (!canStackifyOperand(*II)) return false; - - MachineBasicBlock::iterator InsertPt(II); - int StackCount = 0; - // Operands are popped in reverse order, so any operands after FIOperand - // impose a constraint - for (unsigned i = OperandNum; i < II->getNumOperands(); i++) { - if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount; - } - // Walk backwards, tracking stack depth. When it reaches 0 we have reached the - // top of the subtree. - while (StackCount) { - if (InsertPt == MBB.begin()) return false; - --InsertPt; - for (const auto &def : InsertPt->defs()) - if (isStackifiedVReg(WFI, def)) --StackCount; - for (const auto &use : InsertPt->explicit_uses()) - if (isStackifiedVReg(WFI, use)) ++StackCount; - } - II = InsertPt; - return true; -} - void WebAssemblyRegisterInfo::eliminateFrameIndex( MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger * /*RS*/) const { @@ -108,52 +63,59 @@ const MachineFrameInfo &MFI = *MF.getFrameInfo(); int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); + // If this is the address operand of a load or store, make it relative to SP + // and fold the frame offset directly in. if (MI.mayLoadOrStore() && FIOperandNum == WebAssembly::MemOpAddressOperandNo) { - // If this is the address operand of a load or store, make it relative to SP - // and fold the frame offset directly in. assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0); int64_t Offset = MI.getOperand(1).getImm() + FrameOffset; - if (static_cast(Offset) > std::numeric_limits::max()) { - // If this happens the program is invalid, but better to error here than - // generate broken code. - report_fatal_error("Memory offset field overflow"); + if (static_cast(Offset) <= std::numeric_limits::max()) { + MI.getOperand(FIOperandNum - 1).setImm(Offset); + MI.getOperand(FIOperandNum) + .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + return; } - MI.getOperand(FIOperandNum - 1).setImm(Offset); - MI.getOperand(FIOperandNum) - .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); - } else { - // Otherwise calculate the address - auto &MRI = MF.getRegInfo(); - const auto *TII = MF.getSubtarget().getInstrInfo(); - - unsigned FIRegOperand = WebAssembly::SP32; - if (FrameOffset) { - // Create i32.add SP, offset and make it the operand. We want to stackify - // this sequence, but we need to preserve the LIFO expr stack ordering - // (i.e. we can't insert our code in between MI and any operands it - // pops before FIOperand). - auto *WFI = MF.getInfo(); - bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II); - - unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), - OffsetOp) - .addImm(FrameOffset); - if (CanStackifyFI) { - WFI->stackifyVReg(OffsetOp); - FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - WFI->stackifyVReg(FIRegOperand); - } else { - FIRegOperand = OffsetOp; + } + + // If this is an address being added to a constant, fold the frame offset + // into the constant. + if (MI.getOpcode() == WebAssembly::ADD_I32) { + MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); + if (OtherMO.isReg()) { + unsigned OtherMOReg = OtherMO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) { + MachineInstr *Def = MF.getRegInfo().getVRegDef(OtherMOReg); + if (Def->getOpcode() == WebAssembly::CONST_I32) { + MachineOperand &ImmMO = Def->getOperand(1); + ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); + MI.getOperand(FIOperandNum) + .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + return; + } } - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), - FIRegOperand) - .addReg(WebAssembly::SP32) - .addReg(OffsetOp); } - MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); } + + // Otherwise create an i32.add SP, offset and make it the operand. + auto &MRI = MF.getRegInfo(); + const auto *TII = MF.getSubtarget().getInstrInfo(); + + unsigned FIRegOperand = WebAssembly::SP32; + if (FrameOffset) { + // Create i32.add SP, offset and make it the operand. + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned OffsetOp = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + OffsetOp) + .addImm(FrameOffset); + FIRegOperand = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), + FIRegOperand) + .addReg(WebAssembly::SP32) + .addReg(OffsetOp); + } + MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); } unsigned Index: lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -0,0 +1,164 @@ +//===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a pass that replaces physical registers with +/// virtual registers. +/// +/// LLVM expects certain physical registers, such as a stack pointer. However, +/// WebAssembly doesn't actually have such physical registers. This pass is run +/// once LLVM no longer needs these registers, and replaces them with virtual +/// registers, so they can participate in register stackifying and coloring in +/// the normal way. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-replace-phys-regs" + +namespace { +class WebAssemblyReplacePhysRegs final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Replace Physical Registers"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyReplacePhysRegs::ID = 0; +FunctionPass *llvm::createWebAssemblyReplacePhysRegs() { + return new WebAssemblyReplacePhysRegs(); +} + +/// Get the appropriate argument opcode for the given register class. +static unsigned GetArgumentOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::ARGUMENT_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::ARGUMENT_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::ARGUMENT_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::ARGUMENT_F64; + llvm_unreachable("Unexpected register class"); +} + +bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Replace Physical Registers **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TRI = *MF.getSubtarget().getRegisterInfo(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + bool Changed = false; + + assert(MRI.isSSA() && "ReplacePhysRegs depends on SSA form"); + + // Create a MachineSSAUpdater instance for each physical register we need + // to replace, and initialize it. + std::unique_ptr SSA[WebAssembly::NUM_TARGET_REGS]; + for (unsigned PReg = WebAssembly::NoRegister + 1; + PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) { + if (PReg == WebAssembly::EXPR_STACK || PReg == WebAssembly::ARGUMENTS) + continue; + + bool isExplicitlyUsed = false; + for (auto R = MRI.reg_begin(PReg); R != MRI.reg_end(); ++R) { + MachineOperand &MO = *R; + if (!MO.isImplicit()) { + isExplicitlyUsed = true; + break; + } + } + if (!isExplicitlyUsed) + continue; + + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg); + + // Create an initial def to represent the live-in definition. + unsigned VReg = MRI.createVirtualRegister(RC); + BuildMI(*MF.begin(), MF.begin()->begin(), DebugLoc(), + TII.get(GetArgumentOpcode(RC)), VReg) + .addImm(-int64_t(PReg)); + + SSA[PReg].reset(new MachineSSAUpdater(MF)); + SSA[PReg]->Initialize(VReg); + Changed = true; + } + + // Scan the function for all explicit uses and defs of physical registers, + // to populate the SSAUpdater instances. + for (auto &MBB : MF) { + // Keep track of the current definition within the block, and perform + // these local replacements manually, since SSAUpdater doesn't support them. + unsigned CurDef[WebAssembly::NUM_TARGET_REGS] = {}; + for (auto &MI : MBB) { + for (MachineOperand &MO : reverse(MI.explicit_operands())) { + if (!MO.isReg()) + continue; + unsigned PReg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(PReg)) + continue; + + if (MO.isDef()) { + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg); + unsigned VReg = MRI.createVirtualRegister(RC); + MO.setReg(VReg); + SSA[PReg]->AddAvailableValue(&MBB, VReg); + CurDef[PReg] = VReg; + } else { + unsigned VReg = CurDef[PReg]; + if (VReg != 0) + MO.setReg(VReg); + } + } + } + } + + // Now all that's left to do are update the uses of non-local defs. The + // SSAUpdater instances handle all the tricky parts. + for (unsigned PReg = WebAssembly::NoRegister + 1; + PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) { + if (!SSA[PReg]) + continue; + for (auto Use = MRI.use_begin(PReg); Use != MRI.use_end(); ) { + MachineOperand &MO = *Use++; + if (!MO.isImplicit()) + SSA[PReg]->RewriteUse(MO); + } + } + + return Changed; +} Index: lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -169,6 +169,15 @@ void WebAssemblyPassConfig::addPreRegAlloc() { TargetPassConfig::addPreRegAlloc(); + // Run WebAssembly's version of the PrologEpilogInserter. Target-independent + // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run + // PEI early, because we don't spill, so we don't need to run it late. + addPass(createWebAssemblyPEI()); + + // Now that we have a prologue and epilogue and all frame indices are + // rewritten, eliminate SP and FP so that we can stackify and color them. + addPass(createWebAssemblyReplacePhysRegs()); + // Prepare store instructions for register stackifying. if (getOptLevel() != CodeGenOpt::None) addPass(createWebAssemblyStoreResults()); @@ -202,11 +211,6 @@ } TargetPassConfig::addPostRegAlloc(); - - // Run WebAssembly's version of the PrologEpilogInserter. Target-independent - // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run - // PEI before ShrinkWrap but otherwise in the same position in the order. - addPass(createWebAssemblyPEI()); } void WebAssemblyPassConfig::addPreEmitPass() { Index: test/CodeGen/WebAssembly/byval.ll =================================================================== --- test/CodeGen/WebAssembly/byval.ll +++ test/CodeGen/WebAssembly/byval.ll @@ -23,21 +23,21 @@ ; CHECK-LABEL: byval_arg define void @byval_arg(%SmallStruct* %ptr) { ; CHECK: .param i32 + ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer ; Subtract 16 from SP (SP is 16-byte aligned) ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L2]], $pop[[L3]] ; Ensure SP is stored back before the call - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]] + ; CHECK-NEXT: i32.store [[SP:.+]]=, 0($pop[[L4]]), $pop[[L10]]{{$}} ; Copy the SmallStruct argument to the stack (SP+12, original SP-4) ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0) - ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]] + ; CHECK-NEXT: i32.store $discard=, 12([[SP]]), $pop[[L4]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12{{$}} + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]]{{$}} + ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]]{{$}} call void @ext_byval_func(%SmallStruct* byval %ptr) ; Restore the stack ; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer @@ -53,14 +53,15 @@ ; CHECK: .param i32 ; Don't check the entire SP sequence, just enough to get the alignment. ; CHECK: i32.const $push[[L1:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop{{.+}}), $pop[[L10]]{{$}} ; Copy the SmallStruct argument to the stack (SP+8, original SP-8) - ; CHECK: i32.load $push[[L4:.+]]=, 0($0):p2align=3 - ; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]):p2align=3, $pop[[L4]] + ; CHECK: i32.load $push[[L0:.+]]=, 0($0):p2align=3 + ; CHECK-NEXT: i32.store $discard=, 8($[[SP]]):p2align=3, $pop[[L0]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8 - ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8{{$}} + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, $[[SP]], $pop[[L5]]{{$}} + ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]]{{$}} call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr) ret void } @@ -70,13 +71,14 @@ ; CHECK: .param i32 ; Subtract 16 from SP (SP is 16-byte aligned) ; CHECK: i32.const $push[[L1:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, {{.+}}, $pop[[L12]] ; Copy the AlignedStruct argument to the stack (SP+0, original SP-16) ; Just check the last load/store pair of the memcpy ; CHECK: i64.load $push[[L4:.+]]=, 0($0) - ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]] + ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L4]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: call ext_byval_func_alignedstruct@FUNCTION, [[SP]] + ; CHECK-NEXT: call ext_byval_func_alignedstruct@FUNCTION, $[[SP]] tail call void @ext_byval_func_alignedstruct(%AlignedStruct* byval %ptr) ret void } @@ -108,11 +110,15 @@ ; Call memcpy for "big" byvals. ; CHECK-LABEL: big_byval: +; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 131072 -; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] -; CHECK: i32.call ${{[^,]+}}=, memcpy@FUNCTION, +; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]] +; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}} +; CHECK-NEXT: i32.const $push[[L0]]=, 131072 +; CHECK-NEXT: i32.call $push[[L10:.+]]=, memcpy@FUNCTION, +; CHECK-NEXT: tee_local $push[[L9:.+]], $[[SP]]=, $pop[[L10:.+]]{{$}} ; CHECK-NEXT: call big_byval_callee@FUNCTION, %big = type [131072 x i8] declare void @big_byval_callee(%big* byval align 1) Index: test/CodeGen/WebAssembly/mem-intrinsics.ll =================================================================== --- test/CodeGen/WebAssembly/mem-intrinsics.ll +++ test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -61,8 +61,8 @@ ; CHECK-LABEL: frame_index: -; CHECK: i32.call $discard=, memset@FUNCTION, $pop12, $pop1, $pop0{{$}} -; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop3, $pop2{{$}} +; CHECK: i32.call $discard=, memset@FUNCTION, $pop{{[0-9]+}}, $pop1, $pop0{{$}} +; CHECK: i32.call $push{{[0-9]+}}=, memset@FUNCTION, ${{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} ; CHECK: return{{$}} define void @frame_index() { entry: Index: test/CodeGen/WebAssembly/store-results.ll =================================================================== --- test/CodeGen/WebAssembly/store-results.ll +++ test/CodeGen/WebAssembly/store-results.ll @@ -61,7 +61,8 @@ } ; CHECK-LABEL: fi_ret: -; CHECK: i32.store $discard=, +; CHECK: i32.store $push0=, +; CHECK: return $pop0{{$}} define hidden i8* @fi_ret(i8** %addr) { entry: %buf = alloca [27 x i8], align 16 Index: test/CodeGen/WebAssembly/userstack.ll =================================================================== --- test/CodeGen/WebAssembly/userstack.ll +++ test/CodeGen/WebAssembly/userstack.ll @@ -12,19 +12,20 @@ ; Check that there is an extra local for the stack pointer. ; CHECK: .local i32{{$}} define void @alloca32() noredzone { + ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer{{$}} ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] + ; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}} + ; CHECK-NEXT: tee_local $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L10]]{{$}} %retval = alloca i32 ; CHECK: i32.const $push[[L0:.+]]=, 0 - ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]] + ; CHECK: i32.store {{.*}}=, 12($pop[[L9]]), $pop[[L0]] store i32 0, i32* %retval ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]] ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -32,17 +33,18 @@ ; CHECK-LABEL: alloca3264: ; CHECK: .local i32{{$}} define void @alloca3264() { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK: i32.const $push[[L2:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L2]]) + ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16 + ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]] + ; CHECK-NEXT: tee_local $push[[L5:.+]]=, $[[SP:.+]]=, $pop[[L6]] %r1 = alloca i32 %r2 = alloca double - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 0 - ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]] + ; CHECK-NEXT: i32.const $push[[L0:.+]]=, 0 + ; CHECK-NEXT: i32.store $discard=, 12($pop[[L5]]), $pop[[L0]] store i32 0, i32* %r1 - ; CHECK-NEXT: i64.const $push[[L0:.+]]=, 0 - ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L0]] + ; CHECK-NEXT: i64.const $push[[L1:.+]]=, 0 + ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L1]] store double 0.0, double* %r2 ; CHECK-NEXT: return ret void @@ -51,52 +53,52 @@ ; CHECK-LABEL: allocarray: ; CHECK: .local i32{{$}} define void @allocarray() { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 144{{$}} - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] + ; CHECK: i32.const $push[[L7:.+]]=, __stack_pointer + ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L5:.+]]=, 0($pop[[L4]]) + ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 144{{$}} + ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L5]], $pop[[L6]] + ; CHECK-NEXT: i32.store $push[[L13:.+]]=, 0($pop[[L7]]), $pop[[L11]] + ; CHECK-NEXT: tee_local $push[[L12:.+]]=, $[[SP:.+]]=, $pop[[L13]] %r = alloca [33 x i32] - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]] - ; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}} - ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}} + ; CHECK-NEXT: i32.const $push[[L2:.+]]=, 24 + ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $pop[[L12]], $pop[[L2]] + ; CHECK-NEXT: i32.const $push[[L0:.+]]=, 1{{$}} + ; CHECK-NEXT: i32.store $push[[L1:.+]]=, 12($[[SP]]), $pop[[L0]]{{$}} + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L3]]), $pop[[L1]]{{$}} %p = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p %p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 - ; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144 - ; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]] - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]] + ; CHECK: i32.const $push[[L10:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L8:.+]]=, 144 + ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $[[SP]], $pop[[L8]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L10]]), $pop[[L9]] ret void } ; CHECK-LABEL: non_mem_use define void @non_mem_use(i8** %addr) { ; CHECK: i32.const $push[[L1:.+]]=, 48 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, {{.+}}, $pop[[L11]] %buf = alloca [27 x i8], align 16 %r = alloca i64 %r2 = alloca i64 ; %r is at SP+8 ; CHECK: i32.const $push[[OFF:.+]]=, 8 - ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $[[SP]], $pop[[OFF]] ; CHECK-NEXT: call ext_func@FUNCTION, $pop[[ARG1]] call void @ext_func(i64* %r) ; %r2 is at SP+0, no add needed - ; CHECK-NEXT: call ext_func@FUNCTION, [[SP]] + ; CHECK-NEXT: call ext_func@FUNCTION, $[[SP]] call void @ext_func(i64* %r2) ; Use as a value, but in a store ; %buf is at SP+16 ; CHECK: i32.const $push[[OFF:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $[[SP]], $pop[[OFF]] ; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]] %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0 store i8* %gep, i8** %addr @@ -106,59 +108,62 @@ ; CHECK-LABEL: allocarray_inbounds: ; CHECK: .local i32{{$}} define void @allocarray_inbounds() { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 32{{$}} - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($pop[[L3]]) + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32{{$}} + ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L4]], $pop[[L5]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L6]]), $pop[[L10]]{{$}} %r = alloca [5 x i32] ; CHECK: i32.const $push[[L3:.+]]=, 1 - ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]] + ; CHECK: i32.store {{.*}}=, 12($[[SP]]), $pop[[L3]] %p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p ; This store should have both the GEP and the FI folded into it. - ; CHECK-NEXT: i32.store {{.*}}=, 24([[SP]]), $pop + ; CHECK-NEXT: i32.store {{.*}}=, 24($[[SP]]), $pop %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 call void @ext_func(i64* null); ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]] ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } ; CHECK-LABEL: dynamic_alloca: define void @dynamic_alloca(i32 %alloc) { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] + ; CHECK: i32.const $push[[L7:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $[[FP:.+]]=, 0($pop[[L7]]) + ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer{{$}} + ; CHECK-NOT: copy_local ; Target independent codegen bumps the stack pointer. ; CHECK: i32.sub - ; CHECK-NEXT: copy_local [[SP]]=, + ; CHECK-NOT: copy_local ; Check that SP is written back to memory after decrement - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] + ; CHECK: tee_local $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L10]]{{$}} + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L9]] %r = alloca i32, i32 %alloc ; Target-independent codegen also calculates the store addr - ; CHECK: call ext_func_i32@FUNCTION + ; CHECK: call ext_func_i32@FUNCTION, $[[SP]]{{$}} call void @ext_func_i32(i32* %r) - ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L3]]), [[FP]] + ; CHECK: i32.const $push[[L8:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L8]]), $[[FP]] ret void } ; CHECK-LABEL: dynamic_alloca_redzone: define void @dynamic_alloca_redzone(i32 %alloc) { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] + ; CHECK: i32.const $push[[L8:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $[[FP:.+]]=, 0($pop[[L8]]) + ; CHECK-NOT: copy_local ; Target independent codegen bumps the stack pointer ; CHECK: i32.sub [[R:.+]]=, - ; CHECK-NEXT: copy_local [[SP]]=, [[R]] + ; CHECK-NOT: copy_local %r = alloca i32, i32 %alloc ; check-next here asserts that SP is not written back. - ; CHECK-NEXT: i32.const $push[[ZERO:.+]]=, 0 - ; CHECK-NEXT: i32.store $discard=, 0([[R]]), $pop[[ZERO]] + ; CHECK-NEXT: i32.const $push[[L7:.+]]=, 0 + ; CHECK-NEXT: i32.store $discard=, 0($pop{{.+}}), $pop[[L7]] store i32 0, i32* %r ; CHECK-NEXT: return ret void @@ -167,26 +172,26 @@ ; CHECK-LABEL: dynamic_static_alloca: define void @dynamic_static_alloca(i32 %alloc) noredzone { ; Decrement SP in the prolog by the static amount and writeback to memory. - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] - ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]] + ; CHECK: i32.const $push[[L11:.+]]=, __stack_pointer + ; CHECK: i32.const $push[[L8:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L9:.+]]=, 0($pop[[L8]]) + ; CHECK-NEXT: i32.const $push[[L10:.+]]=, 16 + ; CHECK-NEXT: i32.sub $push[[L18:.+]]=, $pop[[L9]], $pop[[L10]] + ; CHECK-NEXT: tee_local $push[[L17:.+]]=, $[[FP:.+]]=, $pop[[L18]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L11]]), $pop[[L17]]{{$}} + ; CHECK-NOT: copy_local ; Decrement SP in the body by the dynamic amount. ; CHECK: i32.sub - ; CHECK: copy_local [[SP]]=, + ; CHECK-NOT: copy_local ; Writeback to memory. - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]] + ; CHECK: i32.store $discard=, 0($pop{{.+}}), $pop{{.+}} %r1 = alloca i32 %r = alloca i32, i32 %alloc store i32 0, i32* %r - ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]] - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] + ; CHECK: i32.const $push[[L14:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L12:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[L13:.+]]=, $[[FP]], $pop[[L12]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L14]]), $pop[[L13]] ret void } @@ -216,12 +221,12 @@ ; Test __builtin_frame_address(0). ; CHECK-LABEL: frameaddress_0: -; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer -; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]]) -; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] -; CHECK-NEXT: call use_i8_star@FUNCTION, [[FP]] -; CHEC K-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer -; CHEC K-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[FP]] +; CHECK: i32.const $push[[L0:.+]]=, __stack_pointer +; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L0]]) +; CHECK-NEXT: tee_local $push[[L2:.+]]=, $[[FP:.+]]=, $pop[[L3]]{{$}} +; CHECK-NEXT: call use_i8_star@FUNCTION, $pop[[L2]] +; CHECK-NEXT: i32.const $push[[L1:.+]]=, __stack_pointer +; CHECK-NEXT: i32.store $discard=, 0($pop[[L1]]), $[[FP]] define void @frameaddress_0() { %t = call i8* @llvm.frameaddress(i32 0) call void @use_i8_star(i8* %t)