Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -15622,54 +15622,40 @@ SplitStack; SDLoc dl(Op); + // Get the inputs. + SDNode *Node = Op.getNode(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + unsigned Align = cast(Op.getOperand(2))->getZExtValue(); + EVT VT = Node->getValueType(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + + bool Is64Bit = Subtarget->is64Bit(); + MVT SPTy = getPointerTy(DAG.getDataLayout()); + + SDValue Result; if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDNode* Node = Op.getNode(); - unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" - " not tell us which reg is the stack pointer!"); + " not tell us which reg is the stack pointer!"); EVT VT = Node->getValueType(0); - SDValue Tmp1 = SDValue(Node, 0); - SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); - SDValue Chain = Tmp1.getOperand(0); - - // Chain the dynamic stack allocation so that it doesn't modify the stack - // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), - SDLoc(Node)); - SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) - Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, dl, VT)); - Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), - DAG.getIntPtrConstant(0, dl, true), SDValue(), - SDLoc(Node)); - - SDValue Ops[2] = { Tmp1, Tmp2 }; - return DAG.getMergeValues(Ops, dl); - } - - // Get the inputs. - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - unsigned Align = cast(Op.getOperand(2))->getZExtValue(); - EVT VT = Op.getNode()->getValueType(0); - - bool Is64Bit = Subtarget->is64Bit(); - MVT SPTy = getPointerTy(DAG.getDataLayout()); - - if (SplitStack) { + Result = DAG.getNode(ISD::AND, dl, VT, Result, + DAG.getConstant(-(uint64_t)Align, dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain + } else if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { @@ -15687,10 +15673,8 @@ const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); - SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, + Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); - SDValue Ops1[2] = { Value, Chain }; - return DAG.getMergeValues(Ops1, dl); } else { SDValue Flag; const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX); @@ -15712,9 +15696,14 @@ Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); } - SDValue Ops1[2] = { SP, Chain }; - return DAG.getMergeValues(Ops1, dl); + Result = SP; } + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { Index: llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll +++ llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll @@ -14,8 +14,9 @@ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%esp) -; CHECK: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call x86_stdcallcc void @f(%Foo* inalloca %b) ; CHECK: calll _f@8 ; CHECK-NOT: %esp Index: llvm/trunk/test/CodeGen/X86/inalloca.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca.ll +++ llvm/trunk/test/CodeGen/X86/inalloca.ll @@ -14,8 +14,9 @@ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%esp) -; CHECK: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call void @f(%Foo* inalloca %b) ; CHECK: calll _f ret void @@ -33,8 +34,9 @@ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%esp) -; CHECK: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b) ; CHECK: movl $1, %eax ; CHECK: calll _inreg_with_inalloca @@ -53,8 +55,9 @@ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK-DAG: movl $13, (%esp) -; CHECK-DAG: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK-DAG: movl $13, (%eax) +; CHECK-DAG: movl $42, 4(%eax) call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b) ; CHECK-DAG: xorl %ecx, %ecx ; CHECK: calll _thiscall_with_inalloca Index: llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk.ll +++ llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s + +; chkstk cannot come before the usual prologue, since it adjusts ESP. + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc18.0.0" + +%struct.S = type { [12 x i8] } + +define x86_thiscallcc void @call_inalloca(i1 %x) { +entry: + %argmem = alloca inalloca <{ %struct.S }>, align 4 + %argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0 + %argidx2 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 1 + store i8 42, i8* %argidx2, align 4 + br i1 %x, label %bb1, label %bb2 + +bb1: + store i8 42, i8* %argidx1, align 4 + br label %bb2 + +bb2: + call void @inalloca_params(<{ %struct.S }>* inalloca nonnull %argmem) + ret void +} + +; CHECK-LABEL: _call_inalloca: # @call_inalloca +; CHECK: pushl %ebp +; CHECK: movl %esp, %ebp +; CHECK: movl $12, %eax +; CHECK: calll __chkstk +; CHECK: calll _inalloca_params +; CHECK: movl %ebp, %esp +; CHECK: popl %ebp +; CHECK: retl + +declare void @inalloca_params(<{ %struct.S }>* inalloca)