Index: lib/Target/X86/X86CallFrameOptimization.cpp =================================================================== --- lib/Target/X86/X86CallFrameOptimization.cpp +++ lib/Target/X86/X86CallFrameOptimization.cpp @@ -346,15 +346,15 @@ while (I->getOpcode() == X86::LEA32r) ++I; - // We expect a copy instruction here. - // TODO: The copy instruction is a lowering artifact. - // We should also support a copy-less version, where the stack - // pointer is used directly. - if (!I->isCopy() || !I->getOperand(0).isReg()) - return; - Context.SPCopy = &*I++; - - unsigned StackPtr = Context.SPCopy->getOperand(0).getReg(); + unsigned StackPtr = RegInfo.getStackRegister(); + // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual + // register here. If it's there, use that virtual register as stack pointer + // instead. + if (I->isCopy() && I->getOperand(0).isReg() && I->getOperand(1).isReg() && + I->getOperand(1).getReg() == StackPtr) { + Context.SPCopy = &*I++; + StackPtr = Context.SPCopy->getOperand(0).getReg(); + } // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of store instructions that @@ -539,7 +539,7 @@ // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: - if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) + if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) Context.SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved Index: test/CodeGen/X86/fast-isel-call.ll =================================================================== --- test/CodeGen/X86/fast-isel-call.ll +++ test/CodeGen/X86/fast-isel-call.ll @@ -22,12 +22,12 @@ call void @foo2(%struct.s* byval %d ) ret void ; CHECK-LABEL: test2: -; CHECK: movl (%eax) -; CHECK: movl {{.*}}, (%esp) -; CHECK: movl 4(%eax) -; CHECK: movl {{.*}}, 4(%esp) -; CHECK: movl 8(%eax) -; CHECK: movl {{.*}}, 8(%esp) +; CHECK: movl (%eax), %[[reg1:e[a-d]x]] +; CHECK: movl 4(%eax), %[[reg2:e[a-d]x]] +; CHECK: movl 8(%eax), %[[reg3:e[a-d]x]] +; CHECK: pushl %[[reg3]] +; CHECK: pushl %[[reg2]] +; CHECK: pushl %[[reg1]] } declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind