Index: lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- lib/Target/WebAssembly/CMakeLists.txt +++ lib/Target/WebAssembly/CMakeLists.txt @@ -47,7 +47,7 @@ WebAssemblyRuntimeLibcallSignatures.cpp WebAssemblySelectionDAGInfo.cpp WebAssemblySetP2AlignOperands.cpp - WebAssemblyStoreResults.cpp + WebAssemblyMemIntrinsicResults.cpp WebAssemblySubtarget.cpp WebAssemblyTargetMachine.cpp WebAssemblyTargetObjectFile.cpp Index: lib/Target/WebAssembly/README.txt =================================================================== --- lib/Target/WebAssembly/README.txt +++ lib/Target/WebAssembly/README.txt @@ -94,7 +94,7 @@ //===---------------------------------------------------------------------===// Instead of the OptimizeReturned pass, which should consider preserving the -"returned" attribute through to MachineInstrs and extending the StoreResults +"returned" attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too. That would also let the WebAssemblyPeephole pass clean up dead defs for such calls, as it does for stores. Index: lib/Target/WebAssembly/WebAssembly.h =================================================================== --- lib/Target/WebAssembly/WebAssembly.h +++ lib/Target/WebAssembly/WebAssembly.h @@ -43,7 +43,7 @@ FunctionPass *createWebAssemblyReplacePhysRegs(); FunctionPass *createWebAssemblyPrepareForLiveIntervals(); FunctionPass *createWebAssemblyOptimizeLiveIntervals(); -FunctionPass *createWebAssemblyStoreResults(); +FunctionPass *createWebAssemblyMemIntrinsicResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); FunctionPass *createWebAssemblyExplicitLocals(); @@ -68,7 +68,7 @@ void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &); void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &); -void initializeWebAssemblyStoreResultsPass(PassRegistry &); +void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); void initializeWebAssemblyRegStackifyPass(PassRegistry &); void initializeWebAssemblyRegColoringPass(PassRegistry &); void initializeWebAssemblyExplicitLocalsPass(PassRegistry &); Index: lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp +++ lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp @@ -1,4 +1,4 @@ -//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===// +//== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==// // // The LLVM Compiler Infrastructure // @@ -8,19 +8,22 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements an optimization pass using store result values. +/// This file implements an optimization pass using memory intrinsic results. /// -/// WebAssembly's store instructions return the stored value. This is to enable -/// an optimization wherein uses of the stored value can be replaced by uses of -/// the store's result value, making the stored value register more likely to -/// be single-use, thus more likely to be useful to register stackifying, and -/// potentially also exposing the store to register stackifying. These both can -/// reduce get_local/set_local traffic. +/// Calls to memory intrinsics (memcpy, memmove, memset) return the destination +/// address. They are in the form of +/// %dst_new = call @memcpy %dst, %src, %len +/// where %dst and %dst_new registers contain the same value. /// -/// This pass also performs this optimization for memcpy, memmove, and memset -/// calls, since the LLVM intrinsics for these return void so they can't use the -/// returned attribute and consequently aren't handled by the OptimizeReturned -/// pass. +/// This is to enable an optimization wherein uses of the %dst register used in +/// the parameter can be replaced by uses of the %dst_new register used in the +/// result, making the %dst register more likely to be single-use, thus more +/// likely to be useful to register stackifying, and potentially also exposing +/// the call instruction itself to register stackifying. These both can reduce +/// get_local/set_local traffic. +/// +/// The LLVM intrinsics for these return void so they can't use the returned +/// attribute and consequently aren't handled by the OptimizeReturned pass. /// //===----------------------------------------------------------------------===// @@ -38,15 +41,17 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define DEBUG_TYPE "wasm-store-results" +#define DEBUG_TYPE "wasm-mem-intrinsic-results" namespace { -class WebAssemblyStoreResults final : public MachineFunctionPass { +class WebAssemblyMemIntrinsicResults final : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid - WebAssemblyStoreResults() : MachineFunctionPass(ID) {} + WebAssemblyMemIntrinsicResults() : MachineFunctionPass(ID) {} - StringRef getPassName() const override { return "WebAssembly Store Results"; } + StringRef getPassName() const override { + return "WebAssembly Memory Intrinsic Results"; + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -67,12 +72,13 @@ }; } // end anonymous namespace -char WebAssemblyStoreResults::ID = 0; -INITIALIZE_PASS(WebAssemblyStoreResults, DEBUG_TYPE, - "Optimize store result values for WebAssembly", false, false) +char WebAssemblyMemIntrinsicResults::ID = 0; +INITIALIZE_PASS(WebAssemblyMemIntrinsicResults, DEBUG_TYPE, + "Optimize memory intrinsic result values for WebAssembly", + false, false) -FunctionPass *llvm::createWebAssemblyStoreResults() { - return new WebAssemblyStoreResults(); +FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() { + return new WebAssemblyMemIntrinsicResults(); } // Replace uses of FromReg with ToReg if they are dominated by MI. @@ -164,14 +170,14 @@ unsigned FromReg = MI.getOperand(2).getReg(); unsigned ToReg = MI.getOperand(0).getReg(); if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) - report_fatal_error("Store results: call to builtin function with wrong " - "signature, from/to mismatch"); + report_fatal_error("Memory Intrinsic results: call to builtin function " + "with wrong signature, from/to mismatch"); return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); } -bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { +bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG({ - dbgs() << "********** Store Results **********\n" + dbgs() << "********** Memory Intrinsic Results **********\n" << "********** Function: " << MF.getName() << '\n'; }); @@ -186,7 +192,8 @@ // We don't preserve SSA form. MRI.leaveSSA(); - assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking"); + assert(MRI.tracksLiveness() && + "MemIntrinsicResults expects liveness tracking"); for (auto &MBB : MF) { LLVM_DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); Index: lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -62,7 +62,7 @@ initializeWebAssemblyReplacePhysRegsPass(PR); initializeWebAssemblyPrepareForLiveIntervalsPass(PR); initializeWebAssemblyOptimizeLiveIntervalsPass(PR); - initializeWebAssemblyStoreResultsPass(PR); + initializeWebAssemblyMemIntrinsicResultsPass(PR); initializeWebAssemblyRegStackifyPass(PR); initializeWebAssemblyRegColoringPass(PR); initializeWebAssemblyExplicitLocalsPass(PR); @@ -305,13 +305,14 @@ // Depend on LiveIntervals and perform some optimizations on it. addPass(createWebAssemblyOptimizeLiveIntervals()); - // Prepare store instructions for register stackifying. - addPass(createWebAssemblyStoreResults()); + // Prepare memory intrinsic calls for register stackifying. + addPass(createWebAssemblyMemIntrinsicResults()); // Mark registers as representing wasm's value stack. This is a key // code-compression technique in WebAssembly. We run this pass (and - // StoreResults above) very late, so that it sees as much code as possible, - // including code emitted by PEI and expanded by late tail duplication. + // MemIntrinsicResults above) very late, so that it sees as much code as + // possible, including code emitted by PEI and expanded by late tail + // duplication. addPass(createWebAssemblyRegStackify()); // Run the register coloring pass to reduce the total number of registers. Index: test/DebugInfo/WebAssembly/dbg-value-live-interval.ll =================================================================== --- test/DebugInfo/WebAssembly/dbg-value-live-interval.ll +++ test/DebugInfo/WebAssembly/dbg-value-live-interval.ll @@ -4,7 +4,7 @@ ; CHECK: bb.3.for.body.for.body_crit_edge: ; CHECK: [[REG:%[0-9]+]]:i32 = nsw ADD_I32 {{.*}} fib.c:7:7 ; CHECK: DBG_VALUE [[REG]]:i32, $noreg, !"a", {{.*}} fib.c:5:13 -; CHECK: After WebAssembly Store Results: +; CHECK: After WebAssembly Memory Intrinsic Results: ; ModuleID = 'fib.bc' source_filename = "fib.c"