Index: llvm/trunk/lib/CodeGen/BranchFolding.cpp =================================================================== --- llvm/trunk/lib/CodeGen/BranchFolding.cpp +++ llvm/trunk/lib/CodeGen/BranchFolding.cpp @@ -1845,7 +1845,7 @@ if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; unsigned Reg = MO.getReg(); - if (!Reg) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) continue; LocalDefs.push_back(Reg); addRegAndItsAliases(Reg, TRI, LocalDefsSet); Index: llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt +++ llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt @@ -22,13 +22,16 @@ WebAssemblyLowerBrUnless.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp + WebAssemblyOptimizeLiveIntervals.cpp WebAssemblyOptimizeReturned.cpp WebAssemblyPeephole.cpp WebAssemblyPEI.cpp + WebAssemblyPrepareForLiveIntervals.cpp WebAssemblyRegisterInfo.cpp WebAssemblyRegColoring.cpp WebAssemblyRegNumbering.cpp WebAssemblyRegStackify.cpp + WebAssemblyReplacePhysRegs.cpp WebAssemblySelectionDAGInfo.cpp WebAssemblySetP2AlignOperands.cpp WebAssemblyStoreResults.cpp Index: llvm/trunk/lib/Target/WebAssembly/WebAssembly.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssembly.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssembly.h @@ -23,17 +23,25 @@ class WebAssemblyTargetMachine; class FunctionPass; +// LLVM IR passes. FunctionPass *createWebAssemblyOptimizeReturned(); +// ISel and immediate followup passes. FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createWebAssemblyArgumentMove(); FunctionPass *createWebAssemblySetP2AlignOperands(); +// Regalloc-time passes. +FunctionPass *createWebAssemblyPEI(); + +// Late passes. +FunctionPass *createWebAssemblyReplacePhysRegs(); +FunctionPass *createWebAssemblyPrepareForLiveIntervals(); +FunctionPass *createWebAssemblyOptimizeLiveIntervals(); FunctionPass *createWebAssemblyStoreResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); -FunctionPass *createWebAssemblyPEI(); FunctionPass *createWebAssemblyFixIrreducibleControlFlow(); FunctionPass *createWebAssemblyCFGStackify(); FunctionPass *createWebAssemblyLowerBrUnless(); Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -93,10 +93,7 @@ //===----------------------------------------------------------------------===// MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { - const TargetRegisterClass *TRC = - TargetRegisterInfo::isVirtualRegister(RegNo) - ? MRI->getRegClass(RegNo) - : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo); + const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) if (TRC->hasType(T)) return T; @@ -183,13 +180,6 @@ LocalTypes.push_back(getRegType(VReg)); AnyWARegs = true; } - auto &PhysRegs = MFI->getPhysRegs(); - for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) { - if (PhysRegs[PReg] == -1U) - continue; - LocalTypes.push_back(getRegType(PReg)); - AnyWARegs = true; - } if (AnyWARegs) getTargetStreamer()->emitLocal(LocalTypes); Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -83,8 +83,11 @@ MachineBasicBlock::iterator &InsertStore, DebugLoc DL) { auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); - unsigned SPAddr = - MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned SPAddr = MRI.createVirtualRegister(PtrRC); + unsigned Discard = MRI.createVirtualRegister(PtrRC); const auto *TII = MF.getSubtarget().getInstrInfo(); BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr) @@ -92,13 +95,12 @@ auto *MMO = new MachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 4, 4); BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32), - SrcReg) + Discard) .addImm(0) .addReg(SPAddr) .addImm(2) // p2align .addReg(SrcReg) .addMemOperand(MMO); - MF.getInfo()->stackifyVReg(SPAddr); } MachineBasicBlock::iterator @@ -122,7 +124,6 @@ auto *MFI = MF.getFrameInfo(); assert(MFI->getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers"); - auto *WFI = MF.getInfo(); if (!needsSP(MF, *MFI)) return; uint64_t StackSize = MFI->getStackSize(); @@ -133,8 +134,10 @@ auto InsertPt = MBB.begin(); DebugLoc DL; - unsigned SPAddr = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned SPAddr = MRI.createVirtualRegister(PtrRC); + unsigned SPReg = MRI.createVirtualRegister(PtrRC); auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr) .addExternalSymbol(SPSymbol); @@ -151,25 +154,22 @@ .addReg(SPAddr) // addr .addImm(2) // p2align .addMemOperand(LoadMMO); - WFI->stackifyVReg(SPAddr); if (StackSize) { // Subtract the frame size - unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), WebAssembly::SP32) .addReg(SPReg) .addReg(OffsetReg); - WFI->stackifyVReg(OffsetReg); - WFI->stackifyVReg(SPReg); } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive // offsets in load/store instructions. - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY_LOCAL_I32), + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) .addReg(WebAssembly::SP32); } @@ -183,40 +183,31 @@ auto *MFI = MF.getFrameInfo(); uint64_t StackSize = MFI->getStackSize(); if (!needsSP(MF, *MFI) || !needsSPWriteback(MF, *MFI)) return; - auto *WFI = MF.getInfo(); const auto *TII = MF.getSubtarget().getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.getFirstTerminator(); DebugLoc DL; - if (InsertPt != MBB.end()) { + if (InsertPt != MBB.end()) DL = InsertPt->getDebugLoc(); - // If code has been stackified with the return, disconnect it so that we - // don't break the tree when we insert code just before the return. - if (InsertPt->isReturn() && InsertPt->getNumExplicitOperands() != 0) { - WebAssemblyFunctionInfo &MFI = *MF.getInfo(); - MFI.unstackifyVReg(InsertPt->getOperand(0).getReg()); - } - } - // Restore the stack pointer. If we had fixed-size locals, add the offset // subtracted in the prolog. unsigned SPReg = 0; MachineBasicBlock::iterator InsertAddr = InsertPt; if (StackSize) { - unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); InsertAddr = BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); // In the epilog we don't need to write the result back to the SP32 physreg // because it won't be used again. We can use a stackified register instead. - SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + SPReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) .addReg(OffsetReg); - WFI->stackifyVReg(OffsetReg); - WFI->stackifyVReg(SPReg); } else { SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; } Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -39,18 +39,13 @@ /// - defined and used in LIFO order with other stack registers BitVector VRegStackified; - // One entry for each possible target reg. we expect it to be small. - std::vector PhysRegs; - // A virtual register holding the pointer to the vararg buffer for vararg // functions. It is created and set in TLI::LowerFormalArguments and read by // TLI::LowerVASTART unsigned VarargVreg = -1U; public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) { - PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U); - } + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} ~WebAssemblyFunctionInfo() override; void addParam(MVT VT) { Params.push_back(VT); } @@ -69,11 +64,6 @@ VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1); VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg)); } - void unstackifyVReg(unsigned VReg) { - if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) - return; - VRegStackified.reset(TargetRegisterInfo::virtReg2Index(VReg)); - } bool isVRegStackified(unsigned VReg) const { if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) return false; @@ -87,11 +77,8 @@ WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg; } unsigned getWAReg(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); - return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; - } - return PhysRegs[Reg]; + assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); + return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; } // If new virtual registers are created after initWARegs has been called, // this function can be used to add WebAssembly register mappings for them. @@ -99,13 +86,6 @@ assert(VReg = WARegs.size()); WARegs.push_back(WAReg); } - - void addPReg(unsigned PReg, unsigned WAReg) { - assert(PReg < WebAssembly::NUM_TARGET_REGS); - assert(WAReg < -1U); - PhysRegs[PReg] = WAReg; - } - const std::vector &getPhysRegs() const { return PhysRegs; } }; } // end namespace llvm Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -0,0 +1,105 @@ +//===--- WebAssemblyOptimizeLiveIntervals.cpp - LiveInterval processing ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Optimize LiveIntervals for use in a post-RA context. +// +/// LiveIntervals normally runs before register allocation when the code is +/// only recently lowered out of SSA form, so it's uncommon for registers to +/// have multiple defs, and then they do, the defs are usually closely related. +/// Later, after coalescing, tail duplication, and other optimizations, it's +/// more common to see registers with multiple unrelated defs. This pass +/// updates LiveIntervalAnalysis to distribute the value numbers across separate +/// LiveIntervals. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-live-intervals" + +namespace { +class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Optimize Live Intervals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(LiveVariablesID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyOptimizeLiveIntervals() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyOptimizeLiveIntervals::ID = 0; +FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() { + return new WebAssemblyOptimizeLiveIntervals(); +} + +bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n" + "********** Function: " + << MF.getName() << '\n'); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + LiveIntervals &LIS = getAnalysis(); + + // We don't preserve SSA form. + MRI.leaveSSA(); + + assert(MRI.tracksLiveness() && + "OptimizeLiveIntervals expects liveness"); + + // Split multiple-VN LiveIntervals into multiple LiveIntervals. + SmallVector SplitLIs; + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI.reg_nodbg_empty(Reg)) + continue; + + LIS.splitSeparateComponents(LIS.getInterval(Reg), SplitLIs); + SplitLIs.clear(); + } + + // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF + // instructions to satisfy LiveIntervals' requirement that all uses be + // dominated by defs. Now that LiveIntervals has computed which of these + // defs are actually needed and which are dead, remove the dead ones. + for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE; ) { + MachineInstr *MI = &*MII++; + if (MI->isImplicitDef() && MI->getOperand(0).isDead()) { + LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg()); + LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*MI).getRegSlot()); + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + } + + return false; +} Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyPEI.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InlineAsm.h" Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -0,0 +1,136 @@ +//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Fix up code to meet LiveInterval's requirements. +/// +/// Some CodeGen passes don't preserve LiveInterval's requirements, because +/// they run after register allocation and it isn't important. However, +/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code +/// to meet LiveIntervals' requirements; primarily, it ensures that all +/// virtual register uses have definitions (IMPLICIT_DEF definitions if +/// nothing else). +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-prepare-for-live-intervals" + +namespace { +class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {} + +private: + const char *getPassName() const override { + return "WebAssembly Prepare For LiveIntervals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyPrepareForLiveIntervals::ID = 0; +FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() { + return new WebAssemblyPrepareForLiveIntervals(); +} + +/// Test whether the given instruction is an ARGUMENT. +static bool IsArgument(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + return true; + default: + return false; + } +} + +// Test whether the given register has an ARGUMENT def. +static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { + for (auto &Def : MRI.def_instructions(Reg)) + if (IsArgument(&Def)) + return true; + return false; +} + +bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Prepare For LiveIntervals **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &Entry = *MF.begin(); + + assert(!mustPreserveAnalysisID(LiveIntervalsID) && + "LiveIntervals shouldn't be active yet!"); + + // We don't preserve SSA form. + MRI.leaveSSA(); + + // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF + // instructions. LiveIntervals requires that all paths to virtual register + // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to + // conservatively satisfy this. + // + // TODO: This is fairly heavy-handed; find a better approach. + // + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // Skip unused registers. + if (MRI.use_nodbg_empty(Reg)) + continue; + + // Skip registers that have an ARGUMENT definition. + if (HasArgumentDef(Reg, MRI)) + continue; + + BuildMI(Entry, Entry.begin(), DebugLoc(), + TII.get(WebAssembly::IMPLICIT_DEF), Reg); + Changed = true; + } + + // Move ARGUMENT_* instructions to the top of the entry block, so that their + // liveness reflects the fact that these really are live-in values. + for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) { + MachineInstr *MI = &*MII++; + if (IsArgument(MI)) { + MI->removeFromParent(); + Entry.insert(Entry.begin(), MI); + } + } + + // Ok, we're now ready to run LiveIntervalAnalysis again. + MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); + + return Changed; +} Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -61,7 +61,6 @@ WebAssemblyFunctionInfo &MFI = *MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineFrameInfo &FrameInfo = *MF.getFrameInfo(); MFI.initWARegs(); @@ -73,11 +72,13 @@ case WebAssembly::ARGUMENT_I32: case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: - case WebAssembly::ARGUMENT_F64: + case WebAssembly::ARGUMENT_F64: { + int64_t Imm = MI.getOperand(1).getImm(); DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg " - << MI.getOperand(1).getImm() << "\n"); - MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); + << Imm << "\n"); + MFI.setWAReg(MI.getOperand(0).getReg(), Imm); break; + } default: break; } @@ -107,17 +108,6 @@ MFI.setWAReg(VReg, CurReg++); } } - // Allocate locals for used physical registers - bool HasFP = - MF.getSubtarget().getFrameLowering()->hasFP(MF); - if (FrameInfo.getStackSize() > 0 || FrameInfo.adjustsStack() || HasFP) { - DEBUG(dbgs() << "PReg SP " << CurReg << "\n"); - MFI.addPReg(WebAssembly::SP32, CurReg++); - } - if (HasFP) { - DEBUG(dbgs() << "PReg FP " << CurReg << "\n"); - MFI.addPReg(WebAssembly::FP32, CurReg++); - } return true; } Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -110,6 +110,10 @@ continue; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions + // from moving down, and we've already checked for that. + if (Reg == WebAssembly::ARGUMENTS) + continue; // If the physical register is never modified, ignore it. if (!MRI.isPhysRegModified(Reg)) continue; @@ -118,7 +122,7 @@ } // Ask LiveIntervals whether moving this virtual register use or def to - // Insert will change value numbers are seen. + // Insert will change which value numbers are seen. const LiveInterval &LI = LIS.getInterval(Reg); VNInfo *DefVNI = MO.isDef() ? LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) @@ -141,11 +145,23 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, const MachineBasicBlock &MBB, const MachineRegisterInfo &MRI, - const MachineDominatorTree &MDT) { + const MachineDominatorTree &MDT, + LiveIntervals &LIS) { + const LiveInterval &LI = LIS.getInterval(Reg); + + const MachineInstr *OneUseInst = OneUse.getParent(); + VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst)); + for (const MachineOperand &Use : MRI.use_operands(Reg)) { if (&Use == &OneUse) continue; + const MachineInstr *UseInst = Use.getParent(); + VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst)); + + if (UseVNI != OneUseVNI) + continue; + const MachineInstr *OneUseInst = OneUse.getParent(); if (UseInst->getOpcode() == TargetOpcode::PHI) { // Test that the PHI use, which happens on the CFG edge rather than @@ -183,13 +199,33 @@ /// A single-use def in the same block with no intervening memory or register /// dependencies; move the def down and nest it with the current instruction. -static MachineInstr *MoveForSingleUse(unsigned Reg, MachineInstr *Def, +static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op, + MachineInstr *Def, MachineBasicBlock &MBB, MachineInstr *Insert, LiveIntervals &LIS, - WebAssemblyFunctionInfo &MFI) { + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { MBB.splice(Insert, &MBB, Def); LIS.handleMove(*Def); - MFI.stackifyVReg(Reg); + + if (MRI.hasOneDef(Reg)) { + MFI.stackifyVReg(Reg); + } else { + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + Def->getOperand(0).setReg(NewReg); + Op.setReg(NewReg); + + // Tell LiveIntervals about the new register. + LIS.createAndComputeVirtRegInterval(NewReg); + + // Tell LiveIntervals about the changes to the old register. + LiveInterval &LI = LIS.getInterval(Reg); + LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*Def).getRegSlot()); + LIS.shrinkToUses(&LI); + + MFI.stackifyVReg(NewReg); + } + ImposeStackOrdering(Def); return Def; } @@ -211,18 +247,23 @@ MFI.stackifyVReg(NewReg); ImposeStackOrdering(Clone); + // Shrink the interval. + bool IsDead = MRI.use_empty(Reg); + if (!IsDead) { + LiveInterval &LI = LIS.getInterval(Reg); + LIS.shrinkToUses(&LI); + IsDead = !LI.liveAt(LIS.getInstructionIndex(*Def).getDeadSlot()); + } + // If that was the last use of the original, delete the original. - // Otherwise shrink the LiveInterval. - if (MRI.use_empty(Reg)) { + if (IsDead) { SlotIndex Idx = LIS.getInstructionIndex(*Def).getRegSlot(); LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx); - LIS.removeVRegDefAt(LIS.getInterval(Reg), Idx); LIS.removeInterval(Reg); LIS.RemoveMachineInstrFromMaps(*Def); Def->eraseFromParent(); - } else { - LIS.shrinkToUses(&LIS.getInterval(Reg)); } + return Clone; } @@ -488,13 +529,13 @@ bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) && !TreeWalker.IsOnStack(Reg); if (CanMove && MRI.hasOneUse(Reg)) { - Insert = MoveForSingleUse(Reg, Def, MBB, Insert, LIS, MFI); + Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI); } else if (Def->isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA)) { Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI, TII, TRI); } else if (CanMove && - OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT)) { + OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS)) { Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI, TII); } else { @@ -536,15 +577,13 @@ SmallVector Stack; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) + continue; for (MachineOperand &MO : reverse(MI.explicit_operands())) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - // Don't stackify physregs like SP or FP. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (MFI.isVRegStackified(Reg)) { if (MO.isDef()) Stack.push_back(Reg); Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -51,51 +51,6 @@ return Reserved; } -static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI, - const MachineOperand& Op) { - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - return TargetRegisterInfo::isVirtualRegister(Reg) && - WFI->isVRegStackified(Reg); - } - return false; -} - -static bool canStackifyOperand(const MachineInstr& Inst) { - unsigned Op = Inst.getOpcode(); - return Op != TargetOpcode::PHI && - Op != TargetOpcode::INLINEASM && - Op != TargetOpcode::DBG_VALUE; -} - -// Determine if the FI sequence can be stackified, and if so, where the code can -// be inserted. If stackification is possible, returns true and ajusts II to -// point to the insertion point. -bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB, - unsigned OperandNum, MachineBasicBlock::iterator &II) { - if (!canStackifyOperand(*II)) return false; - - MachineBasicBlock::iterator InsertPt(II); - int StackCount = 0; - // Operands are popped in reverse order, so any operands after FIOperand - // impose a constraint - for (unsigned i = OperandNum; i < II->getNumOperands(); i++) { - if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount; - } - // Walk backwards, tracking stack depth. When it reaches 0 we have reached the - // top of the subtree. - while (StackCount) { - if (InsertPt == MBB.begin()) return false; - --InsertPt; - for (const auto &def : InsertPt->defs()) - if (isStackifiedVReg(WFI, def)) --StackCount; - for (const auto &use : InsertPt->explicit_uses()) - if (isStackifiedVReg(WFI, use)) ++StackCount; - } - II = InsertPt; - return true; -} - void WebAssemblyRegisterInfo::eliminateFrameIndex( MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger * /*RS*/) const { @@ -104,56 +59,67 @@ MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); const MachineFrameInfo &MFI = *MF.getFrameInfo(); int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); + // If this is the address operand of a load or store, make it relative to SP + // and fold the frame offset directly in. if (MI.mayLoadOrStore() && FIOperandNum == WebAssembly::MemOpAddressOperandNo) { - // If this is the address operand of a load or store, make it relative to SP - // and fold the frame offset directly in. assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0); int64_t Offset = MI.getOperand(1).getImm() + FrameOffset; - if (static_cast(Offset) > std::numeric_limits::max()) { - // If this happens the program is invalid, but better to error here than - // generate broken code. - report_fatal_error("Memory offset field overflow"); + if (static_cast(Offset) <= std::numeric_limits::max()) { + MI.getOperand(FIOperandNum - 1).setImm(Offset); + MI.getOperand(FIOperandNum) + .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + return; } - MI.getOperand(FIOperandNum - 1).setImm(Offset); - MI.getOperand(FIOperandNum) - .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); - } else { - // Otherwise calculate the address - auto &MRI = MF.getRegInfo(); - const auto *TII = MF.getSubtarget().getInstrInfo(); - - unsigned FIRegOperand = WebAssembly::SP32; - if (FrameOffset) { - // Create i32.add SP, offset and make it the operand. We want to stackify - // this sequence, but we need to preserve the LIFO expr stack ordering - // (i.e. we can't insert our code in between MI and any operands it - // pops before FIOperand). - auto *WFI = MF.getInfo(); - bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II); - - unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), - OffsetOp) - .addImm(FrameOffset); - if (CanStackifyFI) { - WFI->stackifyVReg(OffsetOp); - FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - WFI->stackifyVReg(FIRegOperand); - } else { - FIRegOperand = OffsetOp; + } + + // If this is an address being added to a constant, fold the frame offset + // into the constant. + if (MI.getOpcode() == WebAssembly::ADD_I32) { + MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); + if (OtherMO.isReg()) { + unsigned OtherMOReg = OtherMO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) { + MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg); + // TODO: For now we just opportunistically do this in the case where + // the CONST_I32 happens to have exactly one def and one use. We + // should generalize this to optimize in more cases. + if (Def && Def->getOpcode() == WebAssembly::CONST_I32 && + MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) { + MachineOperand &ImmMO = Def->getOperand(1); + ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); + MI.getOperand(FIOperandNum) + .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + return; + } } - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), - FIRegOperand) - .addReg(WebAssembly::SP32) - .addReg(OffsetOp); } - MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); } + + // Otherwise create an i32.add SP, offset and make it the operand. + const auto *TII = MF.getSubtarget().getInstrInfo(); + + unsigned FIRegOperand = WebAssembly::SP32; + if (FrameOffset) { + // Create i32.add SP, offset and make it the operand. + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned OffsetOp = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + OffsetOp) + .addImm(FrameOffset); + FIRegOperand = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), + FIRegOperand) + .addReg(WebAssembly::SP32) + .addReg(OffsetOp); + } + MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); } unsigned Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -0,0 +1,97 @@ +//===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a pass that replaces physical registers with +/// virtual registers. +/// +/// LLVM expects certain physical registers, such as a stack pointer. However, +/// WebAssembly doesn't actually have such physical registers. This pass is run +/// once LLVM no longer needs these registers, and replaces them with virtual +/// registers, so they can participate in register stackifying and coloring in +/// the normal way. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-replace-phys-regs" + +namespace { +class WebAssemblyReplacePhysRegs final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {} + +private: + const char *getPassName() const override { + return "WebAssembly Replace Physical Registers"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyReplacePhysRegs::ID = 0; +FunctionPass *llvm::createWebAssemblyReplacePhysRegs() { + return new WebAssemblyReplacePhysRegs(); +} + +bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Replace Physical Registers **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TRI = *MF.getSubtarget().getRegisterInfo(); + bool Changed = false; + + assert(!mustPreserveAnalysisID(LiveIntervalsID) && + "LiveIntervals shouldn't be active yet!"); + // We don't preserve SSA or liveness. + MRI.leaveSSA(); + MRI.invalidateLiveness(); + + for (unsigned PReg = WebAssembly::NoRegister + 1; + PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) { + // Skip fake registers that are never used explicitly. + if (PReg == WebAssembly::EXPR_STACK || PReg == WebAssembly::ARGUMENTS) + continue; + + // Replace explicit uses of the physical register with a virtual register. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg); + unsigned VReg = WebAssembly::NoRegister; + for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E; ) { + MachineOperand &MO = *I++; + if (!MO.isImplicit()) { + if (VReg == WebAssembly::NoRegister) + VReg = MRI.createVirtualRegister(RC); + MO.setReg(VReg); + Changed = true; + } + } + } + + return Changed; +} Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -29,6 +29,7 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -55,6 +56,9 @@ AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -74,57 +78,78 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI, unsigned FromReg, unsigned ToReg, const MachineRegisterInfo &MRI, - MachineDominatorTree &MDT) { + MachineDominatorTree &MDT, + LiveIntervals &LIS) { bool Changed = false; + + LiveInterval *FromLI = &LIS.getInterval(FromReg); + LiveInterval *ToLI = &LIS.getInterval(ToReg); + + SlotIndex FromIdx = LIS.getInstructionIndex(MI).getRegSlot(); + VNInfo *FromVNI = FromLI->getVNInfoAt(FromIdx); + + SmallVector Indices; + for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { MachineOperand &O = *I++; MachineInstr *Where = O.getParent(); - if (Where->getOpcode() == TargetOpcode::PHI) { - // PHIs use their operands on their incoming CFG edges rather than - // in their parent blocks. Get the basic block paired with this use - // of FromReg and check that MI's block dominates it. - MachineBasicBlock *Pred = - Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); - if (!MDT.dominates(&MBB, Pred)) - continue; - } else { - // For a non-PHI, check that MI dominates the instruction in the - // normal way. - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; - } + + // Check that MI dominates the instruction in the normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + + // If this use gets a different value, skip it. + SlotIndex WhereIdx = LIS.getInstructionIndex(*Where); + VNInfo *WhereVNI = FromLI->getVNInfoAt(WhereIdx); + if (WhereVNI && WhereVNI != FromVNI) + continue; + + // Make sure ToReg isn't clobbered before it gets there. + VNInfo *ToVNI = ToLI->getVNInfoAt(WhereIdx); + if (ToVNI && ToVNI != FromVNI) + continue; + Changed = true; DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from " << MI << "\n"); O.setReg(ToReg); - // If the store's def was previously dead, it is no longer. But the - // dead flag shouldn't be set yet. - assert(!MI.getOperand(0).isDead() && "Unexpected dead flag"); + + // If the store's def was previously dead, it is no longer. + MI.getOperand(0).setIsDead(false); + + Indices.push_back(WhereIdx.getRegSlot()); + } + + if (Changed) { + // Extend ToReg's liveness. + LIS.extendToIndices(*ToLI, Indices); + + // Shrink FromReg's liveness. + LIS.shrinkToUses(FromLI); + + // If we replaced all dominated uses, FromReg is now killed at MI. + if (!FromLI->liveAt(FromIdx.getDeadSlot())) + MI.addRegisterKilled(FromReg, + MBB.getParent()->getSubtarget() + .getRegisterInfo()); } + return Changed; } static bool optimizeStore(MachineBasicBlock &MBB, MachineInstr &MI, const MachineRegisterInfo &MRI, - MachineDominatorTree &MDT) { - const auto &Stored = MI.getOperand(WebAssembly::StoreValueOperandNo); - switch (Stored.getType()) { - case MachineOperand::MO_Register: { - unsigned ToReg = MI.getOperand(0).getReg(); - unsigned FromReg = Stored.getReg(); - return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT); - } - case MachineOperand::MO_FrameIndex: - // TODO: optimize. - return false; - default: - report_fatal_error("Store results: store not consuming reg or frame index"); - } + MachineDominatorTree &MDT, + LiveIntervals &LIS) { + unsigned ToReg = MI.getOperand(0).getReg(); + unsigned FromReg = MI.getOperand(WebAssembly::StoreValueOperandNo).getReg(); + return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); } static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI, const MachineRegisterInfo &MRI, MachineDominatorTree &MDT, + LiveIntervals &LIS, const WebAssemblyTargetLowering &TLI, const TargetLibraryInfo &LibInfo) { MachineOperand &Op1 = MI.getOperand(1); @@ -142,23 +167,12 @@ if (!LibInfo.getLibFunc(Name, Func)) return false; - const auto &Op2 = MI.getOperand(2); - switch (Op2.getType()) { - case MachineOperand::MO_Register: { - unsigned FromReg = Op2.getReg(); - unsigned ToReg = MI.getOperand(0).getReg(); - if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) - report_fatal_error("Store results: call to builtin function with wrong " - "signature, from/to mismatch"); - return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT); - } - case MachineOperand::MO_FrameIndex: - // TODO: optimize. - return false; - default: + unsigned FromReg = MI.getOperand(2).getReg(); + unsigned ToReg = MI.getOperand(0).getReg(); + if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) report_fatal_error("Store results: call to builtin function with wrong " - "signature, not consuming reg or frame index"); - } + "signature, from/to mismatch"); + return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); } bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { @@ -167,14 +181,18 @@ << "********** Function: " << MF.getName() << '\n'; }); - const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); MachineDominatorTree &MDT = getAnalysis(); const WebAssemblyTargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); const auto &LibInfo = getAnalysis().getTLI(); + LiveIntervals &LIS = getAnalysis(); bool Changed = false; - assert(MRI.isSSA() && "StoreResults depends on SSA form"); + // We don't preserve SSA form. + MRI.leaveSSA(); + + assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking"); for (auto &MBB : MF) { DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); @@ -191,11 +209,11 @@ case WebAssembly::STORE_F64: case WebAssembly::STORE_I32: case WebAssembly::STORE_I64: - Changed |= optimizeStore(MBB, MI, MRI, MDT); + Changed |= optimizeStore(MBB, MI, MRI, MDT, LIS); break; case WebAssembly::CALL_I32: case WebAssembly::CALL_I64: - Changed |= optimizeCall(MBB, MI, MRI, MDT, TLI, LibInfo); + Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo); break; } } Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -103,8 +103,6 @@ void addIRPasses() override; bool addInstSelector() override; - bool addILPOpts() override; - void addPreRegAlloc() override; void addPostRegAlloc() override; bool addGCPasses() override { return false; } void addPreEmitPass() override; @@ -162,19 +160,6 @@ return false; } -bool WebAssemblyPassConfig::addILPOpts() { - (void)TargetPassConfig::addILPOpts(); - return true; -} - -void WebAssemblyPassConfig::addPreRegAlloc() { - TargetPassConfig::addPreRegAlloc(); - - // Prepare store instructions for register stackifying. - if (getOptLevel() != CodeGenOpt::None) - addPass(createWebAssemblyStoreResults()); -} - void WebAssemblyPassConfig::addPostRegAlloc() { // TODO: The following CodeGen passes don't currently support code containing // virtual registers. Consider removing their restrictions and re-enabling @@ -194,14 +179,6 @@ disablePass(&LiveDebugValuesID); disablePass(&PatchableFunctionID); - if (getOptLevel() != CodeGenOpt::None) { - // Mark registers as representing wasm's expression stack. - addPass(createWebAssemblyRegStackify()); - - // Run the register coloring pass to reduce the total number of registers. - addPass(createWebAssemblyRegColoring()); - } - TargetPassConfig::addPostRegAlloc(); // Run WebAssembly's version of the PrologEpilogInserter. Target-independent @@ -213,6 +190,33 @@ void WebAssemblyPassConfig::addPreEmitPass() { TargetPassConfig::addPreEmitPass(); + // Now that we have a prologue and epilogue and all frame indices are + // rewritten, eliminate SP and FP. This allows them to be stackified, + // colored, and numbered with the rest of the registers. + addPass(createWebAssemblyReplacePhysRegs()); + + if (getOptLevel() != CodeGenOpt::None) { + // LiveIntervals isn't commonly run this late. Re-establish preconditions. + addPass(createWebAssemblyPrepareForLiveIntervals()); + + // Depend on LiveIntervals and perform some optimizations on it. + addPass(createWebAssemblyOptimizeLiveIntervals()); + + // Prepare store instructions for register stackifying. + addPass(createWebAssemblyStoreResults()); + + // Mark registers as representing wasm's expression stack. This is a key + // code-compression technique in WebAssembly. We run this pass (and + // StoreResults above) very late, so that it sees as much code as possible, + // including code emitted by PEI and expanded by late tail duplication. + addPass(createWebAssemblyRegStackify()); + + // Run the register coloring pass to reduce the total number of registers. + // This runs after stackification so that it doesn't consider registers + // that become stackified. + addPass(createWebAssemblyRegColoring()); + } + // Eliminate multiple-entry loops. addPass(createWebAssemblyFixIrreducibleControlFlow()); Index: llvm/trunk/test/CodeGen/WebAssembly/byval.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/byval.ll +++ llvm/trunk/test/CodeGen/WebAssembly/byval.ll @@ -23,26 +23,27 @@ ; CHECK-LABEL: byval_arg define void @byval_arg(%SmallStruct* %ptr) { ; CHECK: .param i32 + ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer ; Subtract 16 from SP (SP is 16-byte aligned) ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L2]], $pop[[L3]] ; Ensure SP is stored back before the call - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]] + ; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop[[L4]]), $pop[[L10]]{{$}} + ; CHECK-NEXT: tee_local $push[[L11:.+]]=, $[[SP:.+]]=, $pop[[L12]]{{$}} ; Copy the SmallStruct argument to the stack (SP+12, original SP-4) - ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0) - ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]] + ; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0) + ; CHECK-NEXT: i32.store $discard=, 12($pop[[L11]]), $pop[[L0]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12{{$}} + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, $[[SP]], $pop[[L5]]{{$}} + ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]]{{$}} call void @ext_byval_func(%SmallStruct* byval %ptr) ; Restore the stack ; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[L8:.+]]=, [[SP]], $pop[[L6]] + ; CHECK-NEXT: i32.add $push[[L8:.+]]=, $[[SP]], $pop[[L6]] ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), $pop[[L8]] ; CHECK-NEXT: return ret void @@ -53,14 +54,16 @@ ; CHECK: .param i32 ; Don't check the entire SP sequence, just enough to get the alignment. ; CHECK: i32.const $push[[L1:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop{{.+}}), $pop[[L10]]{{$}} + ; CHECK-NEXT: tee_local $push[[L11:.+]]=, $[[SP:.+]]=, $pop[[L12]]{{$}} ; Copy the SmallStruct argument to the stack (SP+8, original SP-8) - ; CHECK: i32.load $push[[L4:.+]]=, 0($0){{$}} - ; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]), $pop[[L4]]{{$}} + ; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0){{$}} + ; CHECK-NEXT: i32.store $discard=, 8($pop[[L11]]), $pop[[L0]]{{$}} ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8 - ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8{{$}} + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, $[[SP]], $pop[[L5]]{{$}} + ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]]{{$}} call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr) ret void } @@ -70,13 +73,15 @@ ; CHECK: .param i32 ; Subtract 16 from SP (SP is 16-byte aligned) ; CHECK: i32.const $push[[L1:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.store $push[[L15:.+]]=, {{.+}}, $pop[[L12]] + ; CHECK-NEXT: tee_local $push[[L14:.+]]=, $[[SP:.+]]=, $pop[[L15]] ; Copy the AlignedStruct argument to the stack (SP+0, original SP-16) ; Just check the last load/store pair of the memcpy ; CHECK: i64.load $push[[L4:.+]]=, 0($0) - ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]] + ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L4]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: call ext_byval_func_alignedstruct@FUNCTION, [[SP]] + ; CHECK-NEXT: call ext_byval_func_alignedstruct@FUNCTION, $[[SP]] tail call void @ext_byval_func_alignedstruct(%AlignedStruct* byval %ptr) ret void } @@ -108,11 +113,15 @@ ; Call memcpy for "big" byvals. ; CHECK-LABEL: big_byval: +; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 131072 -; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] -; CHECK: i32.call ${{[^,]+}}=, memcpy@FUNCTION, +; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]] +; CHECK-NEXT: i32.store $push[[L12:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}} +; CHECK-NEXT: i32.const $push[[L0:.+]]=, 131072 +; CHECK-NEXT: i32.call $push[[L11:.+]]=, memcpy@FUNCTION, $pop{{.+}}, ${{.+}}, $pop{{.+}} +; CHECK-NEXT: tee_local $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L11]]{{$}} ; CHECK-NEXT: call big_byval_callee@FUNCTION, %big = type [131072 x i8] declare void @big_byval_callee(%big* byval align 1) Index: llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll +++ llvm/trunk/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -104,17 +104,17 @@ ; CHECK-NOT: local ; CHECK: block{{$}} ; CHECK: br_if 0, {{[^,]+}}{{$}} -; CHECK: .LBB2_1: +; CHECK: .LBB2_{{[0-9]+}}: ; CHECK: br_if 0, ${{[0-9]+}}{{$}} -; CHECK: .LBB2_2: +; CHECK: .LBB2_{{[0-9]+}}: ; CHECK: return{{$}} ; OPT-LABEL: test2: ; OPT-NOT: local ; OPT: block{{$}} ; OPT: br_if 0, {{[^,]+}}{{$}} -; OPT: .LBB2_1: +; OPT: .LBB2_{{[0-9]+}}: ; OPT: br_if 0, ${{[0-9]+}}{{$}} -; OPT: .LBB2_2: +; OPT: .LBB2_{{[0-9]+}}: ; OPT: return{{$}} define void @test2(double* nocapture %p, i32 %n) { entry: @@ -393,36 +393,32 @@ ; CHECK: .LBB11_1: ; CHECK: loop{{$}} ; CHECK: block{{$}} -; CHECK: block{{$}} ; CHECK: br_if 0, $0{{$}} ; CHECK: br 1{{$}} ; CHECK: .LBB11_3: +; CHECK: end_block{{$}} ; CHECK: block{{$}} ; CHECK: br_if 0, $1{{$}} ; CHECK: br 1{{$}} ; CHECK: .LBB11_5: -; CHECK: .LBB11_6: ; CHECK: br 0{{$}} -; CHECK: .LBB11_7: +; CHECK: .LBB11_6: ; CHECK-NEXT: end_loop{{$}} ; OPT-LABEL: doublediamond_in_a_loop: ; OPT: .LBB11_1: ; OPT: loop{{$}} ; OPT: block{{$}} -; OPT-NEXT: block{{$}} -; OPT-NEXT: block{{$}} ; OPT: br_if 0, {{[^,]+}}{{$}} -; OPT: br_if 1, {{[^,]+}}{{$}} +; OPT: block{{$}} +; OPT: br_if 0, {{[^,]+}}{{$}} ; OPT: br 2{{$}} ; OPT-NEXT: .LBB11_4: ; OPT-NEXT: end_block{{$}} ; OPT: br 1{{$}} ; OPT: .LBB11_5: ; OPT-NEXT: end_block{{$}} -; OPT: .LBB11_6: -; OPT-NEXT: end_block{{$}} ; OPT: br 0{{$}} -; OPT: .LBB11_7: +; OPT: .LBB11_6: ; OPT-NEXT: end_loop{{$}} define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) { entry: @@ -756,33 +752,19 @@ ; CHECK-LABEL: test8: ; CHECK: .LBB17_1: ; CHECK-NEXT: loop{{$}} -; CHECK-NEXT: block{{$}} -; CHECK-NOT: block -; CHECK: br_if 0, {{[^,]+}}{{$}} -; CHECK-NOT: block -; CHECK: br_if 1, {{[^,]+}}{{$}} -; CHECK-NEXT: .LBB17_3: -; CHECK-NEXT: end_block{{$}} -; CHECK-NEXT: loop{{$}} ; CHECK-NEXT: i32.const $push{{[^,]+}}, 0{{$}} ; CHECK-NEXT: br_if 0, {{[^,]+}}{{$}} -; CHECK-NEXT: br 2{{$}} -; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: br 0{{$}} +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: end_loop{{$}} ; OPT-LABEL: test8: ; OPT: .LBB17_1: ; OPT-NEXT: loop{{$}} -; OPT-NEXT: block{{$}} -; OPT-NOT: block -; OPT: br_if 0, {{[^,]+}}{{$}} -; OPT-NOT: block -; OPT: br_if 1, {{[^,]+}}{{$}} -; OPT-NEXT: .LBB17_3: -; OPT-NEXT: end_block{{$}} -; OPT-NEXT: loop{{$}} ; OPT-NEXT: i32.const $push{{[^,]+}}, 0{{$}} ; OPT-NEXT: br_if 0, {{[^,]+}}{{$}} -; OPT-NEXT: br 2{{$}} -; OPT-NEXT: .LBB17_4: +; OPT-NEXT: br 0{{$}} +; OPT-NEXT: .LBB17_2: +; OPT-NEXT: end_loop{{$}} define i32 @test8() { bb: br label %bb1 @@ -1195,10 +1177,9 @@ ; CHECK-NEXT: loop{{$}} ; CHECK-NEXT: i32.const $push0=, 0{{$}} ; CHECK-NEXT: br_if 0, $pop0{{$}} -; CHECK-NEXT: .LBB23_2:{{$}} ; CHECK-NEXT: end_loop{{$}} +; CHECK-NEXT: .LBB23_3:{{$}} ; CHECK-NEXT: loop{{$}} -; CHECK-NEXT: i32.const $discard=, 0{{$}} ; CHECK-NEXT: i32.const $push1=, 0{{$}} ; CHECK-NEXT: br_if 0, $pop1{{$}} ; CHECK-NEXT: end_loop{{$}} Index: llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -61,8 +61,8 @@ ; CHECK-LABEL: frame_index: -; CHECK: i32.call $discard=, memset@FUNCTION, $pop12, $pop1, $pop0{{$}} -; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop3, $pop2{{$}} +; CHECK: i32.call $discard=, memset@FUNCTION, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} +; CHECK: i32.call $push{{[0-9]+}}=, memset@FUNCTION, ${{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} ; CHECK: return{{$}} define void @frame_index() { entry: @@ -76,11 +76,13 @@ } ; If the result value of memset doesn't get stackified, it should be marked -; $discard. +; $discard. Note that we use a call to prevent tail dup so that we can test +; this specific functionality. ; CHECK-LABEL: discard_result: ; CHECK: i32.call $discard=, memset@FUNCTION, $0, $1, $2 declare i8* @def() +declare void @block_tail_dup() define i8* @discard_result(i8* %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) { bb: %tmp = icmp eq i32 %arg3, 0 @@ -103,5 +105,36 @@ bb11: %tmp12 = phi i8* [ %arg, %bb7 ], [ %arg, %bb8 ], [ %tmp10, %bb9 ] + call void @block_tail_dup() + ret i8* %tmp12 +} + +; This is the same as discard_result, except we let tail dup happen, so the +; result of the memset *is* stackified. + +; CHECK-LABEL: tail_dup_to_reuse_result: +; CHECK: i32.call $push{{[0-9]+}}=, memset@FUNCTION, $0, $1, $2 +define i8* @tail_dup_to_reuse_result(i8* %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) { +bb: + %tmp = icmp eq i32 %arg3, 0 + br i1 %tmp, label %bb5, label %bb9 + +bb5: + %tmp6 = icmp eq i32 %arg4, 0 + br i1 %tmp6, label %bb7, label %bb8 + +bb7: + call void @llvm.memset.p0i8.i32(i8* %arg, i8 %arg1, i32 %arg2, i32 1, i1 false) + br label %bb11 + +bb8: + br label %bb11 + +bb9: + %tmp10 = call i8* @def() + br label %bb11 + +bb11: + %tmp12 = phi i8* [ %arg, %bb7 ], [ %arg, %bb8 ], [ %tmp10, %bb9 ] ret i8* %tmp12 } Index: llvm/trunk/test/CodeGen/WebAssembly/offset.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/offset.ll +++ llvm/trunk/test/CodeGen/WebAssembly/offset.ll @@ -397,9 +397,9 @@ ; merged into i64 stores. ; CHECK-LABEL: aggregate_return: -; CHECK: i64.const $push0=, 0{{$}} -; CHECK: i64.store $push1=, 8($0):p2align=2, $pop0{{$}} -; CHECK: i64.store $discard=, 0($0):p2align=2, $pop1{{$}} +; CHECK: i64.const $push[[L0:[0-9]+]]=, 0{{$}} +; CHECK: i64.store $push[[L1:[0-9]+]]=, 8($0):p2align=2, $pop[[L0]]{{$}} +; CHECK: i64.store $discard=, 0($0):p2align=2, $pop[[L1]]{{$}} define {i32,i32,i32,i32} @aggregate_return() { ret {i32,i32,i32,i32} zeroinitializer } @@ -408,12 +408,12 @@ ; merged. ; CHECK-LABEL: aggregate_return_without_merge: -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.store8 $push1=, 14($0), $pop0{{$}} -; CHECK: i32.store16 $push2=, 12($0), $pop1{{$}} -; CHECK: i32.store $discard=, 8($0), $pop2{{$}} -; CHECK: i64.const $push3=, 0{{$}} -; CHECK: i64.store $discard=, 0($0), $pop3{{$}} +; CHECK: i32.const $push[[L0:[0-9]+]]=, 0{{$}} +; CHECK: i32.store8 $push[[L1:[0-9]+]]=, 14($0), $pop[[L0]]{{$}} +; CHECK: i32.store16 $push[[L2:[0-9]+]]=, 12($0), $pop[[L1]]{{$}} +; CHECK: i32.store $discard=, 8($0), $pop[[L2]]{{$}} +; CHECK: i64.const $push[[L3:[0-9]+]]=, 0{{$}} +; CHECK: i64.store $discard=, 0($0), $pop[[L3]]{{$}} define {i64,i32,i16,i8} @aggregate_return_without_merge() { ret {i64,i32,i16,i8} zeroinitializer } Index: llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll +++ llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll @@ -194,9 +194,9 @@ ; CHECK-LABEL: simple_multiple_use: ; CHECK: .param i32, i32{{$}} ; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}} -; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $0=, $pop[[NUM0]]{{$}} +; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} ; CHECK-NEXT: call use_a@FUNCTION, $pop[[NUM1]]{{$}} -; CHECK-NEXT: call use_b@FUNCTION, $0{{$}} +; CHECK-NEXT: call use_b@FUNCTION, $[[NUM2]]{{$}} ; CHECK-NEXT: return{{$}} declare void @use_a(i32) declare void @use_b(i32) @@ -212,8 +212,8 @@ ; CHECK-LABEL: multiple_uses_in_same_insn: ; CHECK: .param i32, i32{{$}} ; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}} -; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $0=, $pop[[NUM0]]{{$}} -; CHECK-NEXT: call use_2@FUNCTION, $pop[[NUM1]], $0{{$}} +; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} +; CHECK-NEXT: call use_2@FUNCTION, $pop[[NUM1]], $[[NUM2]]{{$}} ; CHECK-NEXT: return{{$}} declare void @use_2(i32, i32) define void @multiple_uses_in_same_insn(i32 %x, i32 %y) { @@ -273,7 +273,6 @@ ; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} ; CHECK-NEXT: f64.select $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}} ; CHECK: $[[NUM2]]=, -; CHECK: $[[NUM2]]=, define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) { bb: br label %bb5 @@ -325,9 +324,9 @@ ; Don't move stores past loads if there may be aliasing ; CHECK-LABEL: no_stackify_store_past_load -; CHECK: i32.store {{.*}}, 0($1), $0 +; CHECK: i32.store $[[L0:[0-9]+]]=, 0($1), $0 ; CHECK: i32.load {{.*}}, 0($2) -; CHECK: i32.call {{.*}}, callee@FUNCTION, $0 +; CHECK: i32.call {{.*}}, callee@FUNCTION, $[[L0]]{{$}} define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) { store i32 %a, i32* %p1 %b = load i32, i32* %p2, align 4 Index: llvm/trunk/test/CodeGen/WebAssembly/store-results.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/store-results.ll +++ llvm/trunk/test/CodeGen/WebAssembly/store-results.ll @@ -61,7 +61,8 @@ } ; CHECK-LABEL: fi_ret: -; CHECK: i32.store $discard=, +; CHECK: i32.store $push0=, +; CHECK: return $pop0{{$}} define hidden i8* @fi_ret(i8** %addr) { entry: %buf = alloca [27 x i8], align 16 Index: llvm/trunk/test/CodeGen/WebAssembly/userstack.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/userstack.ll +++ llvm/trunk/test/CodeGen/WebAssembly/userstack.ll @@ -12,19 +12,20 @@ ; Check that there is an extra local for the stack pointer. ; CHECK: .local i32{{$}} define void @alloca32() noredzone { + ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer{{$}} ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] + ; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 0($pop[[L4]]), $pop[[L8]]{{$}} + ; CHECK-NEXT: tee_local $push[[L9:.+]]=, $[[SP:.+]]=, $pop[[L10]]{{$}} %retval = alloca i32 ; CHECK: i32.const $push[[L0:.+]]=, 0 - ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]] + ; CHECK: i32.store {{.*}}=, 12($pop[[L9]]), $pop[[L0]] store i32 0, i32* %retval ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]] ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -32,17 +33,18 @@ ; CHECK-LABEL: alloca3264: ; CHECK: .local i32{{$}} define void @alloca3264() { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK: i32.const $push[[L2:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L2]]) + ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16 + ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]] + ; CHECK-NEXT: tee_local $push[[L5:.+]]=, $[[SP:.+]]=, $pop[[L6]] %r1 = alloca i32 %r2 = alloca double - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 0 - ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]] + ; CHECK-NEXT: i32.const $push[[L0:.+]]=, 0 + ; CHECK-NEXT: i32.store $discard=, 12($pop[[L5]]), $pop[[L0]] store i32 0, i32* %r1 - ; CHECK-NEXT: i64.const $push[[L0:.+]]=, 0 - ; CHECK-NEXT: i64.store {{.*}}=, 0([[SP]]), $pop[[L0]] + ; CHECK-NEXT: i64.const $push[[L1:.+]]=, 0 + ; CHECK-NEXT: i64.store $discard=, 0($[[SP]]), $pop[[L1]] store double 0.0, double* %r2 ; CHECK-NEXT: return ret void @@ -51,52 +53,52 @@ ; CHECK-LABEL: allocarray: ; CHECK: .local i32{{$}} define void @allocarray() { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 144{{$}} - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] + ; CHECK: i32.const $push[[L7:.+]]=, __stack_pointer + ; CHECK: i32.const $push[[L4:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L5:.+]]=, 0($pop[[L4]]) + ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 144{{$}} + ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L5]], $pop[[L6]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L7]]), $pop[[L11]] %r = alloca [33 x i32] - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]] - ; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}} - ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}} + ; CHECK-NEXT: i32.const $push[[L2:.+]]=, 24 + ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $[[SP]], $pop[[L2]] + ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}} + ; CHECK-NEXT: i32.store $push[[L0:.+]]=, 0($pop[[L3]]), $pop[[L1]]{{$}} + ; CHECK-NEXT: i32.store $discard=, 12($[[SP]]), $pop[[L0]]{{$}} %p = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p %p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 - ; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144 - ; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]] - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]] + ; CHECK: i32.const $push[[L10:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L8:.+]]=, 144 + ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $[[SP]], $pop[[L8]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L10]]), $pop[[L9]] ret void } ; CHECK-LABEL: non_mem_use define void @non_mem_use(i8** %addr) { ; CHECK: i32.const $push[[L1:.+]]=, 48 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, {{.+}}, $pop[[L11]] %buf = alloca [27 x i8], align 16 %r = alloca i64 %r2 = alloca i64 ; %r is at SP+8 + ; CHECK: tee_local $push[[L12:.+]]=, $[[SP:.+]]=, $pop{{.+}} ; CHECK: i32.const $push[[OFF:.+]]=, 8 - ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $pop[[L12]], $pop[[OFF]] ; CHECK-NEXT: call ext_func@FUNCTION, $pop[[ARG1]] call void @ext_func(i64* %r) ; %r2 is at SP+0, no add needed - ; CHECK-NEXT: call ext_func@FUNCTION, [[SP]] + ; CHECK-NEXT: call ext_func@FUNCTION, $[[SP]] call void @ext_func(i64* %r2) ; Use as a value, but in a store ; %buf is at SP+16 ; CHECK: i32.const $push[[OFF:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $[[SP]], $pop[[OFF]] ; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]] %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0 store i8* %gep, i8** %addr @@ -106,23 +108,25 @@ ; CHECK-LABEL: allocarray_inbounds: ; CHECK: .local i32{{$}} define void @allocarray_inbounds() { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 32{{$}} - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($pop[[L3]]) + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32{{$}} + ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L4]], $pop[[L5]] + ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L6]]), $pop[[L10]]{{$}} %r = alloca [5 x i32] ; CHECK: i32.const $push[[L3:.+]]=, 1 - ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]] + ; CHECK: i32.store {{.*}}=, 12($[[SP]]), $pop[[L3]] %p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p ; This store should have both the GEP and the FI folded into it. - ; CHECK-NEXT: i32.store {{.*}}=, 24([[SP]]), $pop + ; CHECK-NEXT: i32.store {{.*}}=, 24($[[SP]]), $pop %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 call void @ext_func(i64* null); ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]] ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -130,14 +134,13 @@ ; CHECK-LABEL: dynamic_alloca: define void @dynamic_alloca(i32 %alloc) { ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] + ; CHECK-NEXT: i32.load $push[[L13:.+]]=, 0($pop[[L1]]) + ; CHECK-NEXT: tee_local $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}} + ; CHECK-NEXT: copy_local [[FP:.+]]=, $pop[[L12]]{{$}} ; Target independent codegen bumps the stack pointer. ; CHECK: i32.sub - ; CHECK-NEXT: copy_local [[SP]]=, ; Check that SP is written back to memory after decrement - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] + ; CHECK: i32.store $discard=, 0($pop{{.+}}), %r = alloca i32, i32 %alloc ; Target-independent codegen also calculates the store addr ; CHECK: call ext_func_i32@FUNCTION @@ -149,16 +152,17 @@ ; CHECK-LABEL: dynamic_alloca_redzone: define void @dynamic_alloca_redzone(i32 %alloc) { - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] + ; CHECK: i32.const $push[[L8:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L13:.+]]=, 0($pop[[L1]]) + ; CHECK-NEXT: tee_local $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}} + ; CHECK-NEXT: copy_local [[FP:.+]]=, $pop[[L12]]{{$}} ; Target independent codegen bumps the stack pointer - ; CHECK: i32.sub [[R:.+]]=, - ; CHECK-NEXT: copy_local [[SP]]=, [[R]] + ; CHECK: i32.sub %r = alloca i32, i32 %alloc - ; check-next here asserts that SP is not written back. - ; CHECK-NEXT: i32.const $push[[ZERO:.+]]=, 0 - ; CHECK-NEXT: i32.store $discard=, 0([[R]]), $pop[[ZERO]] + ; CHECK-NEXT: tee_local $push[[L8:.+]]=, $0=, $pop + ; CHECK-NEXT: copy_local $discard=, $pop[[L8]]{{$}} + ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}} + ; CHECK-NEXT: i32.store $discard=, 0($0), $pop[[L6]]{{$}} store i32 0, i32* %r ; CHECK-NEXT: return ret void @@ -167,26 +171,20 @@ ; CHECK-LABEL: dynamic_static_alloca: define void @dynamic_static_alloca(i32 %alloc) noredzone { ; Decrement SP in the prolog by the static amount and writeback to memory. - ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]] - ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]] + ; CHECK: i32.const $push[[L9:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.load $push[[L10:.+]]=, 0($pop[[L9]]) + ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 16 + ; CHECK-NEXT: i32.sub $push[[L20:.+]]=, $pop[[L10]], $pop[[L11]] + ; CHECK-NEXT: tee_local $push[[L19:.+]]=, $[[FP:.+]]=, $pop[[L20]] + ; CHECK: i32.store $push[[L0:.+]]=, 0($pop{{.+}}), $[[FP]] ; Decrement SP in the body by the dynamic amount. ; CHECK: i32.sub - ; CHECK: copy_local [[SP]]=, ; Writeback to memory. - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]] + ; CHECK: i32.store $discard=, 0($pop{{.+}}), $pop{{.+}} %r1 = alloca i32 %r = alloca i32, i32 %alloc store i32 0, i32* %r - ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]] - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] + ; CHEC: i32.store $discard=, 0($pop{{.+}}), $pop{{.+}} ret void } @@ -196,10 +194,10 @@ define void @copytoreg_fi(i1 %cond, i32* %b) { entry: ; CHECK: i32.const $push[[L1:.+]]=, 16 - ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK-NEXT: i32.sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]] %addr = alloca i32 ; CHECK: i32.const $push[[OFF:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]] ; CHECK-NEXT: copy_local [[COPY:.+]]=, $pop[[ADDR]] br label %body body: @@ -216,12 +214,13 @@ ; Test __builtin_frame_address(0). ; CHECK-LABEL: frameaddress_0: -; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer -; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]]) -; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]] -; CHECK-NEXT: call use_i8_star@FUNCTION, [[FP]] -; CHEC K-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer -; CHEC K-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[FP]] +; CHECK: i32.const $push[[L0:.+]]=, __stack_pointer +; CHECK-NEXT: i32.load $push[[L3:.+]]=, 0($pop[[L0]]) +; CHECK-NEXT: copy_local $push[[L4:.+]]=, $pop[[L3]]{{$}} +; CHECK-NEXT: tee_local $push[[L2:.+]]=, $[[FP:.+]]=, $pop[[L4]]{{$}} +; CHECK-NEXT: call use_i8_star@FUNCTION, $pop[[L2]] +; CHECK-NEXT: i32.const $push[[L1:.+]]=, __stack_pointer +; CHECK-NEXT: i32.store $discard=, 0($pop[[L1]]), $[[FP]] define void @frameaddress_0() { %t = call i8* @llvm.frameaddress(i32 0) call void @use_i8_star(i8* %t)