Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16366,9 +16366,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); bool SplitStack = MF.shouldSplitStack(); - bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || - SplitStack; SDLoc dl(Op); // Get the inputs. @@ -16382,21 +16381,45 @@ // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool Is64Bit = Subtarget.is64Bit(); MVT SPTy = getPointerTy(DAG.getDataLayout()); + bool CheckStack = SplitStack; + if (!CheckStack && Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) { + // The Windows ABI requires us to probe the stack for allocations beyond + // the probe size. + if (auto *SizeC = dyn_cast(Size)) { + // Try to elide the probe if we can prove that this dynamic allocation is + // smaller than the probe size. + unsigned StackProbeSize = 4096; + if (F->hasFnAttribute("stack-probe-size")) + F->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + unsigned AlignedAlloc = SizeC->getZExtValue(); + // Round the dynamic alloca's size up to it's alignment. + if (Align) + AlignedAlloc = alignTo(AlignedAlloc, Align); + + // If the aligned allocation is smaller than the probe size, then we don't + // need to probe the stack. + CheckStack = AlignedAlloc >= StackProbeSize; + } else { + // We cannot tell how big this dynamic alloca will be, probe the stack. + CheckStack = true; + } + } + SDValue Result; - if (!Lower) { + if (!CheckStack) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); - EVT VT = Node->getValueType(0); - SDValue Tmp3 = Node->getOperand(2); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); - unsigned Align = cast(Tmp3)->getZExtValue(); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value @@ -16410,8 +16433,6 @@ if (Is64Bit) { // The 64 bit implementation of segmented stacks needs to clobber both r10 // r11. This makes it impossible to use it along with nested parameters. - const Function *F = MF.getFunction(); - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->hasNestAttr()) @@ -16434,7 +16455,6 @@ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); - const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); Index: llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll +++ llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll @@ -38,8 +38,9 @@ ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp ; CHECK: subl ${{[0-9]+}}, %esp -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: movl %esp, %[[tmp_sp1:.*]] +; CHECK: leal -8(%[[tmp_sp1]]), %[[tmp_sp2:.*]] +; CHECK: %[[tmp_sp2]], %esp ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll _takes_two Index: llvm/trunk/test/CodeGen/X86/dynamic-alloca-in-entry.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/dynamic-alloca-in-entry.ll +++ llvm/trunk/test/CodeGen/X86/dynamic-alloca-in-entry.ll @@ -15,5 +15,8 @@ ret void } ; CHECK-LABEL: _bar: -; CHECK: calll __chkstk +; CHECK: movl %esp, %ebp +; CHECK: movl %esp, %[[sp_tmp:.*]] +; CHECK: addl $-4, %[[sp_tmp]] +; CHECK: movl %[[sp_tmp]], %esp ; CHECK: retl Index: llvm/trunk/test/CodeGen/X86/inalloca-ctor.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca-ctor.ll +++ llvm/trunk/test/CodeGen/X86/inalloca-ctor.ll @@ -10,13 +10,14 @@ define void @g() { entry: +; CHECK: movl %esp, %ebp %args = alloca inalloca %frame %c = getelementptr %frame, %frame* %args, i32 0, i32 2 -; CHECK: movl $20, %eax -; CHECK: calll __chkstk -; CHECK: movl %esp, +; CHECK: movl %esp, %[[tmp_sp1:.*]] +; CHECK: leal -20(%[[tmp_sp1]]), %[[tmp_sp2:.*]] +; CHECK: movl %[[tmp_sp2]], %esp call void @Foo_ctor(%Foo* %c) -; CHECK: leal 12(%{{.*}}), +; CHECK: leal -8(%[[tmp_sp1]]), ; CHECK-NEXT: pushl ; CHECK-NEXT: calll _Foo_ctor ; CHECK: addl $4, %esp Index: llvm/trunk/test/CodeGen/X86/inalloca-invoke.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca-invoke.ll +++ llvm/trunk/test/CodeGen/X86/inalloca-invoke.ll @@ -12,6 +12,7 @@ declare void @reverse(%frame.reverse* inalloca align 4) define i32 @main() personality i32 (...)* @pers { +; CHECK: movl %esp, %ebp %temp.lvalue = alloca %Iter br label %blah @@ -21,9 +22,10 @@ %beg = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 0 %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1 -; CHECK: calll __chkstk -; CHECK: movl %esp, %[[beg:[^ ]*]] -; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]] +; CHECK: movl %esp, %[[end:.*]] +; CHECK: leal -24(%[[end]]), %[[beg:.*]] +; CHECK: movl %[[beg]], %esp +; CHECK: addl $-12, %[[end]] call void @begin(%Iter* sret %temp.lvalue) ; CHECK: calll _begin Index: llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll +++ llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll @@ -7,16 +7,16 @@ define void @g() { ; CHECK-LABEL: _g: +; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: movl %esp, %[[tmp_sp:.*]] +; CHECK: leal -8(%[[tmp_sp]]), %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl %esp, %eax -; CHECK: movl $13, (%eax) -; CHECK: movl $42, 4(%eax) +; CHECK: movl $13, -8(%[[tmp_sp]]) +; CHECK: movl $42, -4(%[[tmp_sp]]) call x86_stdcallcc void @f(%Foo* inalloca %b) ; CHECK: calll _f@8 ; CHECK-NOT: %esp Index: llvm/trunk/test/CodeGen/X86/inalloca.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca.ll +++ llvm/trunk/test/CodeGen/X86/inalloca.ll @@ -7,16 +7,16 @@ define void @a() { ; CHECK-LABEL: _a: entry: +; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: movl %esp, %[[tmp_sp:.*]] +; CHECK: leal -8(%[[tmp_sp]]), %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl %esp, %eax -; CHECK: movl $13, (%eax) -; CHECK: movl $42, 4(%eax) +; CHECK: movl $13, -8(%[[tmp_sp]]) +; CHECK: movl $42, -4(%[[tmp_sp]]) call void @f(%Foo* inalloca %b) ; CHECK: calll _f ret void @@ -27,16 +27,16 @@ define void @b() { ; CHECK-LABEL: _b: entry: +; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: movl %esp, %[[tmp_sp:.*]] +; CHECK: leal -8(%[[tmp_sp]]), %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl %esp, %eax -; CHECK: movl $13, (%eax) -; CHECK: movl $42, 4(%eax) +; CHECK: movl $13, -8(%[[tmp_sp]]) +; CHECK: movl $42, -4(%[[tmp_sp]]) call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b) ; CHECK: movl $1, %eax ; CHECK: calll _inreg_with_inalloca @@ -48,16 +48,16 @@ define void @c() { ; CHECK-LABEL: _c: entry: +; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: movl %esp, %[[tmp_sp:.*]] +; CHECK: leal -8(%[[tmp_sp]]), %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl %esp, %eax -; CHECK-DAG: movl $13, (%eax) -; CHECK-DAG: movl $42, 4(%eax) +; CHECK-DAG: movl $13, -8(%[[tmp_sp]]) +; CHECK-DAG: movl $42, -4(%[[tmp_sp]]) call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b) ; CHECK-DAG: xorl %ecx, %ecx ; CHECK: calll _thiscall_with_inalloca Index: llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk.ll +++ llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -9,7 +9,7 @@ %struct.S = type { [12 x i8] } -define x86_thiscallcc void @call_inalloca(i1 %x) { +define x86_thiscallcc void @call_inalloca(i1 %x) "stack-probe-size"="12" { entry: %argmem = alloca inalloca <{ %struct.S }>, align 4 %argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0