Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -81,6 +81,7 @@ // more precise. static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, AliasAnalysis &AA) { + assert(Def->getParent() == Insert->getParent()); bool SawStore = false, SawSideEffects = false; MachineBasicBlock::const_iterator D(Def), I(Insert); for (--I; I != D; --I) @@ -155,17 +156,15 @@ Def->getOpcode() == WebAssembly::ARGUMENT_F64) continue; - // Single-use expression trees require defs that have one use, or that - // they be trivially clonable. + // Single-use expression trees require defs that have one use. // TODO: Eventually we'll relax this, to take advantage of set_local // returning its result. if (!MRI.hasOneUse(Reg)) continue; - // For now, be conservative and don't look across block boundaries, - // unless we have something trivially clonable. + // For now, be conservative and don't look across block boundaries. // TODO: Be more aggressive. - if (Def->getParent() != &MBB && !Def->isMoveImmediate()) + if (Def->getParent() != &MBB) continue; // Don't move instructions that have side effects or memory dependencies Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -10,9 +10,12 @@ /// \file /// \brief This file implements an optimization pass using store result values. /// -/// WebAssembly's store instructions return the stored value, specifically to -/// enable the optimization of reducing get_local/set_local traffic, which is -/// what we're doing here. +/// WebAssembly's store instructions return the stored value. This is to enable +/// an optimization wherein uses of the stored value can be replaced by uses of +/// the store's result value, making the stored value register more likely to +/// be single-use, thus more likely to be useful to register stackifying, and +/// potentially also exposing the store to register stackifying. These both can +/// reduce get_local/set_local traffic. /// //===----------------------------------------------------------------------===// @@ -89,14 +92,22 @@ for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { MachineOperand &O = *I++; MachineInstr *Where = O.getParent(); - if (Where->getOpcode() == TargetOpcode::PHI) - Where = Where->getOperand(&O - &Where->getOperand(0) + 1) - .getMBB() - ->getFirstTerminator(); - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; - DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << - " from " << MI <<"\n"); + if (Where->getOpcode() == TargetOpcode::PHI) { + // PHIs use their operands on their incoming CFG edges rather than + // in their parent blocks. Get the basic block paired with this use + // of FromReg and check that MI's block dominates it. + MachineBasicBlock *Pred = + Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); + if (!MDT.dominates(&MBB, Pred)) + continue; + } else { + // For a non-PHI, check that MI dominates the instruction in the + // normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + } + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where + << " from " << MI << "\n"); O.setReg(ToReg); } } Index: llvm/trunk/test/CodeGen/WebAssembly/store-results.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/store-results.ll +++ llvm/trunk/test/CodeGen/WebAssembly/store-results.ll @@ -16,3 +16,46 @@ store i32 0, i32* %p ret i32 0 } + +; Test interesting corner cases for wasm-store-results, in which the operand of +; a store ends up getting used by a phi, which needs special handling in the +; dominance test, since phis use their operands on their incoming edges. + +%class.Vec3 = type { float, float, float } + +@pos = global %class.Vec3 zeroinitializer, align 4 + +; CHECK-LABEL: foo: +; CHECK: i32.store $discard=, $pop0, $0 +define void @foo() { +for.body.i: + br label %for.body5.i + +for.body5.i: + %i.0168.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body5.i ] + %conv6.i = sitofp i32 %i.0168.i to float + store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0) + %inc.i = add nuw nsw i32 %i.0168.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 256 + br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i + +for.cond.cleanup4.i: + ret void +} + +; CHECK-LABEL: bar: +; CHECK: i32.store $discard=, $0, $pop0 +define void @bar() { +for.body.i: + br label %for.body5.i + +for.body5.i: + %i.0168.i = phi float [ 0.0, %for.body.i ], [ %inc.i, %for.body5.i ] + store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0) + %inc.i = fadd float %i.0168.i, 1.0 + %exitcond.i = fcmp oeq float %inc.i, 256.0 + br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i + +for.cond.cleanup4.i: + ret void +}