Index: llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt +++ llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt @@ -47,7 +47,7 @@ WebAssemblyRuntimeLibcallSignatures.cpp WebAssemblySelectionDAGInfo.cpp WebAssemblySetP2AlignOperands.cpp - WebAssemblyStoreResults.cpp + WebAssemblyMemIntrinsicResults.cpp WebAssemblySubtarget.cpp WebAssemblyTargetMachine.cpp WebAssemblyTargetObjectFile.cpp Index: llvm/trunk/lib/Target/WebAssembly/README.txt =================================================================== --- llvm/trunk/lib/Target/WebAssembly/README.txt +++ llvm/trunk/lib/Target/WebAssembly/README.txt @@ -94,10 +94,10 @@ //===---------------------------------------------------------------------===// Instead of the OptimizeReturned pass, which should consider preserving the -"returned" attribute through to MachineInstrs and extending the StoreResults -pass to do this optimization on calls too. That would also let the -WebAssemblyPeephole pass clean up dead defs for such calls, as it does for -stores. +"returned" attribute through to MachineInstrs and extending the +MemIntrinsicResults pass to do this optimization on calls too. That would also +let the WebAssemblyPeephole pass clean up dead defs for such calls, as it does +for stores. //===---------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/WebAssembly/WebAssembly.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssembly.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssembly.h @@ -43,7 +43,7 @@ FunctionPass *createWebAssemblyReplacePhysRegs(); FunctionPass *createWebAssemblyPrepareForLiveIntervals(); FunctionPass *createWebAssemblyOptimizeLiveIntervals(); -FunctionPass *createWebAssemblyStoreResults(); +FunctionPass *createWebAssemblyMemIntrinsicResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); FunctionPass *createWebAssemblyExplicitLocals(); @@ -68,7 +68,7 @@ void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &); void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &); -void initializeWebAssemblyStoreResultsPass(PassRegistry &); +void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); void initializeWebAssemblyRegStackifyPass(PassRegistry &); void initializeWebAssemblyRegColoringPass(PassRegistry &); void initializeWebAssemblyExplicitLocalsPass(PassRegistry &); Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp @@ -0,0 +1,212 @@ +//== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements an optimization pass using memory intrinsic results. +/// +/// Calls to memory intrinsics (memcpy, memmove, memset) return the destination +/// address. They are in the form of +/// %dst_new = call @memcpy %dst, %src, %len +/// where %dst and %dst_new registers contain the same value. +/// +/// This is to enable an optimization wherein uses of the %dst register used in +/// the parameter can be replaced by uses of the %dst_new register used in the +/// result, making the %dst register more likely to be single-use, thus more +/// likely to be useful to register stackifying, and potentially also exposing +/// the call instruction itself to register stackifying. These both can reduce +/// local.get/local.set traffic. +/// +/// The LLVM intrinsics for these return void so they can't use the returned +/// attribute and consequently aren't handled by the OptimizeReturned pass. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-mem-intrinsic-results" + +namespace { +class WebAssemblyMemIntrinsicResults final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyMemIntrinsicResults() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "WebAssembly Memory Intrinsic Results"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyMemIntrinsicResults::ID = 0; +INITIALIZE_PASS(WebAssemblyMemIntrinsicResults, DEBUG_TYPE, + "Optimize memory intrinsic result values for WebAssembly", + false, false) + +FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() { + return new WebAssemblyMemIntrinsicResults(); +} + +// Replace uses of FromReg with ToReg if they are dominated by MI. +static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI, + unsigned FromReg, unsigned ToReg, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, + LiveIntervals &LIS) { + bool Changed = false; + + LiveInterval *FromLI = &LIS.getInterval(FromReg); + LiveInterval *ToLI = &LIS.getInterval(ToReg); + + SlotIndex FromIdx = LIS.getInstructionIndex(MI).getRegSlot(); + VNInfo *FromVNI = FromLI->getVNInfoAt(FromIdx); + + SmallVector Indices; + + for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); + I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + + // Check that MI dominates the instruction in the normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + + // If this use gets a different value, skip it. + SlotIndex WhereIdx = LIS.getInstructionIndex(*Where); + VNInfo *WhereVNI = FromLI->getVNInfoAt(WhereIdx); + if (WhereVNI && WhereVNI != FromVNI) + continue; + + // Make sure ToReg isn't clobbered before it gets there. + VNInfo *ToVNI = ToLI->getVNInfoAt(WhereIdx); + if (ToVNI && ToVNI != FromVNI) + continue; + + Changed = true; + LLVM_DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from " + << MI << "\n"); + O.setReg(ToReg); + + // If the store's def was previously dead, it is no longer. + if (!O.isUndef()) { + MI.getOperand(0).setIsDead(false); + + Indices.push_back(WhereIdx.getRegSlot()); + } + } + + if (Changed) { + // Extend ToReg's liveness. + LIS.extendToIndices(*ToLI, Indices); + + // Shrink FromReg's liveness. + LIS.shrinkToUses(FromLI); + + // If we replaced all dominated uses, FromReg is now killed at MI. + if (!FromLI->liveAt(FromIdx.getDeadSlot())) + MI.addRegisterKilled(FromReg, MBB.getParent() + ->getSubtarget() + .getRegisterInfo()); + } + + return Changed; +} + +static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, LiveIntervals &LIS, + const WebAssemblyTargetLowering &TLI, + const TargetLibraryInfo &LibInfo) { + MachineOperand &Op1 = MI.getOperand(1); + if (!Op1.isSymbol()) + return false; + + StringRef Name(Op1.getSymbolName()); + bool callReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) || + Name == TLI.getLibcallName(RTLIB::MEMMOVE) || + Name == TLI.getLibcallName(RTLIB::MEMSET); + if (!callReturnsInput) + return false; + + LibFunc Func; + if (!LibInfo.getLibFunc(Name, Func)) + return false; + + unsigned FromReg = MI.getOperand(2).getReg(); + unsigned ToReg = MI.getOperand(0).getReg(); + if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) + report_fatal_error("Memory Intrinsic results: call to builtin function " + "with wrong signature, from/to mismatch"); + return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); +} + +bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Memory Intrinsic Results **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineDominatorTree &MDT = getAnalysis(); + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget().getTargetLowering(); + const auto &LibInfo = getAnalysis().getTLI(); + LiveIntervals &LIS = getAnalysis(); + bool Changed = false; + + // We don't preserve SSA form. + MRI.leaveSSA(); + + assert(MRI.tracksLiveness() && + "MemIntrinsicResults expects liveness tracking"); + + for (auto &MBB : MF) { + LLVM_DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::CALL_I32: + case WebAssembly::CALL_I64: + Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo); + break; + } + } + + return Changed; +} Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -1,205 +0,0 @@ -//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements an optimization pass using store result values. -/// -/// WebAssembly's store instructions return the stored value. This is to enable -/// an optimization wherein uses of the stored value can be replaced by uses of -/// the store's result value, making the stored value register more likely to -/// be single-use, thus more likely to be useful to register stackifying, and -/// potentially also exposing the store to register stackifying. These both can -/// reduce local.get/local.set traffic. -/// -/// This pass also performs this optimization for memcpy, memmove, and memset -/// calls, since the LLVM intrinsics for these return void so they can't use the -/// returned attribute and consequently aren't handled by the OptimizeReturned -/// pass. -/// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" -#include "WebAssembly.h" -#include "WebAssemblyMachineFunctionInfo.h" -#include "WebAssemblySubtarget.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "wasm-store-results" - -namespace { -class WebAssemblyStoreResults final : public MachineFunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - WebAssemblyStoreResults() : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { return "WebAssembly Store Results"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - -private: -}; -} // end anonymous namespace - -char WebAssemblyStoreResults::ID = 0; -INITIALIZE_PASS(WebAssemblyStoreResults, DEBUG_TYPE, - "Optimize store result values for WebAssembly", false, false) - -FunctionPass *llvm::createWebAssemblyStoreResults() { - return new WebAssemblyStoreResults(); -} - -// Replace uses of FromReg with ToReg if they are dominated by MI. -static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI, - unsigned FromReg, unsigned ToReg, - const MachineRegisterInfo &MRI, - MachineDominatorTree &MDT, - LiveIntervals &LIS) { - bool Changed = false; - - LiveInterval *FromLI = &LIS.getInterval(FromReg); - LiveInterval *ToLI = &LIS.getInterval(ToReg); - - SlotIndex FromIdx = LIS.getInstructionIndex(MI).getRegSlot(); - VNInfo *FromVNI = FromLI->getVNInfoAt(FromIdx); - - SmallVector Indices; - - for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); - I != E;) { - MachineOperand &O = *I++; - MachineInstr *Where = O.getParent(); - - // Check that MI dominates the instruction in the normal way. - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; - - // If this use gets a different value, skip it. - SlotIndex WhereIdx = LIS.getInstructionIndex(*Where); - VNInfo *WhereVNI = FromLI->getVNInfoAt(WhereIdx); - if (WhereVNI && WhereVNI != FromVNI) - continue; - - // Make sure ToReg isn't clobbered before it gets there. - VNInfo *ToVNI = ToLI->getVNInfoAt(WhereIdx); - if (ToVNI && ToVNI != FromVNI) - continue; - - Changed = true; - LLVM_DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from " - << MI << "\n"); - O.setReg(ToReg); - - // If the store's def was previously dead, it is no longer. - if (!O.isUndef()) { - MI.getOperand(0).setIsDead(false); - - Indices.push_back(WhereIdx.getRegSlot()); - } - } - - if (Changed) { - // Extend ToReg's liveness. - LIS.extendToIndices(*ToLI, Indices); - - // Shrink FromReg's liveness. - LIS.shrinkToUses(FromLI); - - // If we replaced all dominated uses, FromReg is now killed at MI. - if (!FromLI->liveAt(FromIdx.getDeadSlot())) - MI.addRegisterKilled(FromReg, MBB.getParent() - ->getSubtarget() - .getRegisterInfo()); - } - - return Changed; -} - -static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI, - const MachineRegisterInfo &MRI, - MachineDominatorTree &MDT, LiveIntervals &LIS, - const WebAssemblyTargetLowering &TLI, - const TargetLibraryInfo &LibInfo) { - MachineOperand &Op1 = MI.getOperand(1); - if (!Op1.isSymbol()) - return false; - - StringRef Name(Op1.getSymbolName()); - bool callReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) || - Name == TLI.getLibcallName(RTLIB::MEMMOVE) || - Name == TLI.getLibcallName(RTLIB::MEMSET); - if (!callReturnsInput) - return false; - - LibFunc Func; - if (!LibInfo.getLibFunc(Name, Func)) - return false; - - unsigned FromReg = MI.getOperand(2).getReg(); - unsigned ToReg = MI.getOperand(0).getReg(); - if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) - report_fatal_error("Store results: call to builtin function with wrong " - "signature, from/to mismatch"); - return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); -} - -bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { - LLVM_DEBUG({ - dbgs() << "********** Store Results **********\n" - << "********** Function: " << MF.getName() << '\n'; - }); - - MachineRegisterInfo &MRI = MF.getRegInfo(); - MachineDominatorTree &MDT = getAnalysis(); - const WebAssemblyTargetLowering &TLI = - *MF.getSubtarget().getTargetLowering(); - const auto &LibInfo = getAnalysis().getTLI(); - LiveIntervals &LIS = getAnalysis(); - bool Changed = false; - - // We don't preserve SSA form. - MRI.leaveSSA(); - - assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking"); - - for (auto &MBB : MF) { - LLVM_DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); - for (auto &MI : MBB) - switch (MI.getOpcode()) { - default: - break; - case WebAssembly::CALL_I32: - case WebAssembly::CALL_I64: - Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo); - break; - } - } - - return Changed; -} Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -62,7 +62,7 @@ initializeWebAssemblyReplacePhysRegsPass(PR); initializeWebAssemblyPrepareForLiveIntervalsPass(PR); initializeWebAssemblyOptimizeLiveIntervalsPass(PR); - initializeWebAssemblyStoreResultsPass(PR); + initializeWebAssemblyMemIntrinsicResultsPass(PR); initializeWebAssemblyRegStackifyPass(PR); initializeWebAssemblyRegColoringPass(PR); initializeWebAssemblyExplicitLocalsPass(PR); @@ -311,13 +311,14 @@ // Depend on LiveIntervals and perform some optimizations on it. addPass(createWebAssemblyOptimizeLiveIntervals()); - // Prepare store instructions for register stackifying. - addPass(createWebAssemblyStoreResults()); + // Prepare memory intrinsic calls for register stackifying. + addPass(createWebAssemblyMemIntrinsicResults()); // Mark registers as representing wasm's value stack. This is a key // code-compression technique in WebAssembly. We run this pass (and - // StoreResults above) very late, so that it sees as much code as possible, - // including code emitted by PEI and expanded by late tail duplication. + // MemIntrinsicResults above) very late, so that it sees as much code as + // possible, including code emitted by PEI and expanded by late tail + // duplication. addPass(createWebAssemblyRegStackify()); // Run the register coloring pass to reduce the total number of registers. Index: llvm/trunk/test/DebugInfo/WebAssembly/dbg-value-live-interval.ll =================================================================== --- llvm/trunk/test/DebugInfo/WebAssembly/dbg-value-live-interval.ll +++ llvm/trunk/test/DebugInfo/WebAssembly/dbg-value-live-interval.ll @@ -4,7 +4,7 @@ ; CHECK: bb.3.for.body.for.body_crit_edge: ; CHECK: [[REG:%[0-9]+]]:i32 = nsw ADD_I32 {{.*}} fib.c:7:7 ; CHECK: DBG_VALUE [[REG]]:i32, $noreg, !"a", {{.*}} fib.c:5:13 -; CHECK: After WebAssembly Store Results: +; CHECK: After WebAssembly Memory Intrinsic Results: ; ModuleID = 'fib.bc' source_filename = "fib.c"